Kstars

csv.h
1// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
2// License: BSD-3
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are met:
8//
9// 1. Redistributions of source code must retain the above copyright notice,
10// this list of conditions and the following disclaimer.
11//
12// 2. Redistributions in binary form must reproduce the above copyright notice,
13// this list of conditions and the following disclaimer in the documentation
14// and/or other materials provided with the distribution.
15//
16// 3. Neither the name of the copyright holder nor the names of its contributors
17// may be used to endorse or promote products derived from this software
18// without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30// POSSIBILITY OF SUCH DAMAGE.
31
32#ifndef CSV_H
33#define CSV_H
34
35#include <algorithm>
36#include <cstdio>
37#include <cstring>
38#include <exception>
39#include <string>
40#include <utility>
41#include <vector>
42#ifndef CSV_IO_NO_THREAD
43#include <condition_variable>
44#include <mutex>
45#include <thread>
46#endif
47#include <cassert>
48#include <cerrno>
49#include <istream>
50#include <limits>
51#include <memory>
52
53namespace io {
54////////////////////////////////////////////////////////////////////////////
55// LineReader //
56////////////////////////////////////////////////////////////////////////////
57
58namespace error {
59struct base : std::exception {
60 virtual void format_error_message() const = 0;
61
62 const char *what() const noexcept override {
63 format_error_message();
64 return error_message_buffer;
65 }
66
67 mutable char error_message_buffer[2048];
68};
69
70// this only affects the file name in the error message
71const int max_file_name_length = 1024;
72
73struct with_file_name {
74 with_file_name() { std::memset(file_name, 0, sizeof(file_name)); }
75
76 void set_file_name(const char *file_name) {
77 if (file_name != nullptr) {
78 // This call to strncpy has parenthesis around it
79 // to silence the GCC -Wstringop-truncation warning
80 (strncpy(this->file_name, file_name, sizeof(this->file_name)));
81 this->file_name[sizeof(this->file_name) - 1] = '\0';
82 } else {
83 this->file_name[0] = '\0';
84 }
85 }
86
87 char file_name[max_file_name_length + 1];
88};
89
90struct with_file_line {
91 with_file_line() { file_line = -1; }
92
93 void set_file_line(int file_line) { this->file_line = file_line; }
94
95 int file_line;
96};
97
98struct with_errno {
99 with_errno() { errno_value = 0; }
100
101 void set_errno(int errno_value) { this->errno_value = errno_value; }
102
103 int errno_value;
104};
105
106struct can_not_open_file : base, with_file_name, with_errno {
107 void format_error_message() const override {
108 if (errno_value != 0)
109 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
110 "Can not open file \"%s\" because \"%s\".", file_name,
111 std::strerror(errno_value));
112 else
113 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
114 "Can not open file \"%s\".", file_name);
115 }
116};
117
118struct line_length_limit_exceeded : base, with_file_name, with_file_line {
119 void format_error_message() const override {
120 std::snprintf(
121 error_message_buffer, sizeof(error_message_buffer),
122 "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1.",
123 file_line, file_name);
124 }
125};
126} // namespace error
127
128class ByteSourceBase {
129public:
130 virtual int read(char *buffer, int size) = 0;
131 virtual ~ByteSourceBase() {}
132};
133
134namespace detail {
135
136class OwningStdIOByteSourceBase : public ByteSourceBase {
137public:
138 explicit OwningStdIOByteSourceBase(FILE *file) : file(file) {
139 // Tell the std library that we want to do the buffering ourself.
140 std::setvbuf(file, 0, _IONBF, 0);
141 }
142
143 int read(char *buffer, int size) { return std::fread(buffer, 1, size, file); }
144
145 ~OwningStdIOByteSourceBase() { std::fclose(file); }
146
147private:
148 FILE *file;
149};
150
151class NonOwningIStreamByteSource : public ByteSourceBase {
152public:
153 explicit NonOwningIStreamByteSource(std::istream &in) : in(in) {}
154
155 int read(char *buffer, int size) {
156 in.read(buffer, size);
157 return in.gcount();
158 }
159
160 ~NonOwningIStreamByteSource() {}
161
162private:
163 std::istream &in;
164};
165
166class NonOwningStringByteSource : public ByteSourceBase {
167public:
168 NonOwningStringByteSource(const char *str, long long size)
169 : str(str), remaining_byte_count(size) {}
170
171 int read(char *buffer, int desired_byte_count) {
172 int to_copy_byte_count = desired_byte_count;
173 if (remaining_byte_count < to_copy_byte_count)
174 to_copy_byte_count = remaining_byte_count;
175 std::memcpy(buffer, str, to_copy_byte_count);
176 remaining_byte_count -= to_copy_byte_count;
177 str += to_copy_byte_count;
178 return to_copy_byte_count;
179 }
180
181 ~NonOwningStringByteSource() {}
182
183private:
184 const char *str;
185 long long remaining_byte_count;
186};
187
188#ifndef CSV_IO_NO_THREAD
189class AsynchronousReader {
190public:
191 void init(std::unique_ptr<ByteSourceBase> arg_byte_source) {
192 std::unique_lock<std::mutex> guard(lock);
193 byte_source = std::move(arg_byte_source);
194 desired_byte_count = -1;
195 termination_requested = false;
196 worker = std::thread([&] {
197 std::unique_lock<std::mutex> guard(lock);
198 try {
199 for (;;) {
200 read_requested_condition.wait(guard, [&] {
201 return desired_byte_count != -1 || termination_requested;
202 });
203 if (termination_requested)
204 return;
205
206 read_byte_count = byte_source->read(buffer, desired_byte_count);
207 desired_byte_count = -1;
208 if (read_byte_count == 0)
209 break;
210 read_finished_condition.notify_one();
211 }
212 } catch (...) {
213 read_error = std::current_exception();
214 }
215 read_finished_condition.notify_one();
216 });
217 }
218
219 bool is_valid() const { return byte_source != nullptr; }
220
221 void start_read(char *arg_buffer, int arg_desired_byte_count) {
222 std::unique_lock<std::mutex> guard(lock);
223 buffer = arg_buffer;
224 desired_byte_count = arg_desired_byte_count;
225 read_byte_count = -1;
226 read_requested_condition.notify_one();
227 }
228
229 int finish_read() {
230 std::unique_lock<std::mutex> guard(lock);
231 read_finished_condition.wait(
232 guard, [&] { return read_byte_count != -1 || read_error; });
233 if (read_error)
234 std::rethrow_exception(read_error);
235 else
236 return read_byte_count;
237 }
238
239 ~AsynchronousReader() {
240 if (byte_source != nullptr) {
241 {
242 std::unique_lock<std::mutex> guard(lock);
243 termination_requested = true;
244 }
245 read_requested_condition.notify_one();
246 worker.join();
247 }
248 }
249
250private:
251 std::unique_ptr<ByteSourceBase> byte_source;
252
253 std::thread worker;
254
255 bool termination_requested;
256 std::exception_ptr read_error;
257 char *buffer;
258 int desired_byte_count;
259 int read_byte_count;
260
261 std::mutex lock;
262 std::condition_variable read_finished_condition;
263 std::condition_variable read_requested_condition;
264};
265#endif
266
267class SynchronousReader {
268public:
269 void init(std::unique_ptr<ByteSourceBase> arg_byte_source) {
270 byte_source = std::move(arg_byte_source);
271 }
272
273 bool is_valid() const { return byte_source != nullptr; }
274
275 void start_read(char *arg_buffer, int arg_desired_byte_count) {
276 buffer = arg_buffer;
277 desired_byte_count = arg_desired_byte_count;
278 }
279
280 int finish_read() { return byte_source->read(buffer, desired_byte_count); }
281
282private:
283 std::unique_ptr<ByteSourceBase> byte_source;
284 char *buffer;
285 int desired_byte_count;
286};
287} // namespace detail
288
289class LineReader {
290private:
291 static const int block_len = 1 << 20;
292 std::unique_ptr<char[]> buffer; // must be constructed before (and thus
293 // destructed after) the reader!
294#ifdef CSV_IO_NO_THREAD
295 detail::SynchronousReader reader;
296#else
297 detail::AsynchronousReader reader;
298#endif
299 int data_begin;
300 int data_end;
301
302 char file_name[error::max_file_name_length + 1];
303 unsigned file_line;
304
305 static std::unique_ptr<ByteSourceBase> open_file(const char *file_name) {
306 // We open the file in binary mode as it makes no difference under *nix
307 // and under Windows we handle \r\n newlines ourself.
308 FILE *file = std::fopen(file_name, "rb");
309 if (file == 0) {
310 int x = errno; // store errno as soon as possible, doing it after
311 // constructor call can fail.
312 error::can_not_open_file err;
313 err.set_errno(x);
314 err.set_file_name(file_name);
315 throw err;
316 }
317 return std::unique_ptr<ByteSourceBase>(
318 new detail::OwningStdIOByteSourceBase(file));
319 }
320
321 void init(std::unique_ptr<ByteSourceBase> byte_source) {
322 file_line = 0;
323
324 buffer = std::unique_ptr<char[]>(new char[3 * block_len]);
325 data_begin = 0;
326 data_end = byte_source->read(buffer.get(), 2 * block_len);
327
328 // Ignore UTF-8 BOM
329 if (data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' &&
330 buffer[2] == '\xBF')
331 data_begin = 3;
332
333 if (data_end == 2 * block_len) {
334 reader.init(std::move(byte_source));
335 reader.start_read(buffer.get() + 2 * block_len, block_len);
336 }
337 }
338
339public:
340 LineReader() = delete;
341 LineReader(const LineReader &) = delete;
342 LineReader &operator=(const LineReader &) = delete;
343
344 explicit LineReader(const char *file_name) {
345 set_file_name(file_name);
346 init(open_file(file_name));
347 }
348
349 explicit LineReader(const std::string &file_name) {
350 set_file_name(file_name.c_str());
351 init(open_file(file_name.c_str()));
352 }
353
354 LineReader(const char *file_name,
355 std::unique_ptr<ByteSourceBase> byte_source) {
356 set_file_name(file_name);
357 init(std::move(byte_source));
358 }
359
360 LineReader(const std::string &file_name,
361 std::unique_ptr<ByteSourceBase> byte_source) {
362 set_file_name(file_name.c_str());
363 init(std::move(byte_source));
364 }
365
366 LineReader(const char *file_name, const char *data_begin,
367 const char *data_end) {
368 set_file_name(file_name);
369 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(
370 data_begin, data_end - data_begin)));
371 }
372
373 LineReader(const std::string &file_name, const char *data_begin,
374 const char *data_end) {
375 set_file_name(file_name.c_str());
376 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(
377 data_begin, data_end - data_begin)));
378 }
379
380 LineReader(const char *file_name, FILE *file) {
381 set_file_name(file_name);
382 init(std::unique_ptr<ByteSourceBase>(
383 new detail::OwningStdIOByteSourceBase(file)));
384 }
385
386 LineReader(const std::string &file_name, FILE *file) {
387 set_file_name(file_name.c_str());
388 init(std::unique_ptr<ByteSourceBase>(
389 new detail::OwningStdIOByteSourceBase(file)));
390 }
391
392 LineReader(const char *file_name, std::istream &in) {
393 set_file_name(file_name);
394 init(std::unique_ptr<ByteSourceBase>(
395 new detail::NonOwningIStreamByteSource(in)));
396 }
397
398 LineReader(const std::string &file_name, std::istream &in) {
399 set_file_name(file_name.c_str());
400 init(std::unique_ptr<ByteSourceBase>(
401 new detail::NonOwningIStreamByteSource(in)));
402 }
403
404 void set_file_name(const std::string &file_name) {
405 set_file_name(file_name.c_str());
406 }
407
408 void set_file_name(const char *file_name) {
409 if (file_name != nullptr) {
410 strncpy(this->file_name, file_name, sizeof(this->file_name));
411 this->file_name[sizeof(this->file_name) - 1] = '\0';
412 } else {
413 this->file_name[0] = '\0';
414 }
415 }
416
417 const char *get_truncated_file_name() const { return file_name; }
418
419 void set_file_line(unsigned file_line) { this->file_line = file_line; }
420
421 unsigned get_file_line() const { return file_line; }
422
423 char *next_line() {
424 if (data_begin == data_end)
425 return nullptr;
426
427 ++file_line;
428
429 assert(data_begin < data_end);
430 assert(data_end <= block_len * 2);
431
432 if (data_begin >= block_len) {
433 std::memcpy(buffer.get(), buffer.get() + block_len, block_len);
434 data_begin -= block_len;
435 data_end -= block_len;
436 if (reader.is_valid()) {
437 data_end += reader.finish_read();
438 std::memcpy(buffer.get() + block_len, buffer.get() + 2 * block_len,
439 block_len);
440 reader.start_read(buffer.get() + 2 * block_len, block_len);
441 }
442 }
443
444 int line_end = data_begin;
445 while (line_end != data_end && buffer[line_end] != '\n') {
446 ++line_end;
447 }
448
449 if (line_end - data_begin + 1 > block_len) {
450 error::line_length_limit_exceeded err;
451 err.set_file_name(file_name);
452 err.set_file_line(file_line);
453 throw err;
454 }
455
456 if (line_end != data_end && buffer[line_end] == '\n') {
457 buffer[line_end] = '\0';
458 } else {
459 // some files are missing the newline at the end of the
460 // last line
461 ++data_end;
462 buffer[line_end] = '\0';
463 }
464
465 // handle windows \r\n-line breaks
466 if (line_end != data_begin && buffer[line_end - 1] == '\r')
467 buffer[line_end - 1] = '\0';
468
469 char *ret = buffer.get() + data_begin;
470 data_begin = line_end + 1;
471 return ret;
472 }
473};
474
475////////////////////////////////////////////////////////////////////////////
476// CSV //
477////////////////////////////////////////////////////////////////////////////
478
479namespace error {
480const int max_column_name_length = 63;
481struct with_column_name {
482 with_column_name() {
483 std::memset(column_name, 0, max_column_name_length + 1);
484 }
485
486 void set_column_name(const char *column_name) {
487 if (column_name != nullptr) {
488 std::strncpy(this->column_name, column_name, max_column_name_length);
489 this->column_name[max_column_name_length] = '\0';
490 } else {
491 this->column_name[0] = '\0';
492 }
493 }
494
495 char column_name[max_column_name_length + 1];
496};
497
498const int max_column_content_length = 63;
499
500struct with_column_content {
501 with_column_content() {
502 std::memset(column_content, 0, max_column_content_length + 1);
503 }
504
505 void set_column_content(const char *column_content) {
506 if (column_content != nullptr) {
507 std::strncpy(this->column_content, column_content,
508 max_column_content_length);
509 this->column_content[max_column_content_length] = '\0';
510 } else {
511 this->column_content[0] = '\0';
512 }
513 }
514
515 char column_content[max_column_content_length + 1];
516};
517
518struct extra_column_in_header : base, with_file_name, with_column_name {
519 void format_error_message() const override {
520 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
521 R"(Extra column "%s" in header of file "%s".)", column_name,
522 file_name);
523 }
524};
525
526struct missing_column_in_header : base, with_file_name, with_column_name {
527 void format_error_message() const override {
528 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
529 R"(Missing column "%s" in header of file "%s".)", column_name,
530 file_name);
531 }
532};
533
534struct duplicated_column_in_header : base, with_file_name, with_column_name {
535 void format_error_message() const override {
536 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
537 R"(Duplicated column "%s" in header of file "%s".)",
538 column_name, file_name);
539 }
540};
541
542struct header_missing : base, with_file_name {
543 void format_error_message() const override {
544 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
545 "Header missing in file \"%s\".", file_name);
546 }
547};
548
549struct too_few_columns : base, with_file_name, with_file_line {
550 void format_error_message() const override {
551 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
552 "Too few columns in line %d in file \"%s\".", file_line,
553 file_name);
554 }
555};
556
557struct too_many_columns : base, with_file_name, with_file_line {
558 void format_error_message() const override {
559 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
560 "Too many columns in line %d in file \"%s\".", file_line,
561 file_name);
562 }
563};
564
565struct escaped_string_not_closed : base, with_file_name, with_file_line {
566 void format_error_message() const override {
567 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
568 "Escaped string was not closed in line %d in file \"%s\".",
569 file_line, file_name);
570 }
571};
572
573struct integer_must_be_positive : base,
574 with_file_name,
575 with_file_line,
576 with_column_name,
577 with_column_content {
578 void format_error_message() const override {
579 std::snprintf(
580 error_message_buffer, sizeof(error_message_buffer),
581 R"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)",
582 column_content, column_name, file_name, file_line);
583 }
584};
585
586struct no_digit : base,
587 with_file_name,
588 with_file_line,
589 with_column_name,
590 with_column_content {
591 void format_error_message() const override {
592 std::snprintf(
593 error_message_buffer, sizeof(error_message_buffer),
594 R"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)",
595 column_content, column_name, file_name, file_line);
596 }
597};
598
599struct integer_overflow : base,
600 with_file_name,
601 with_file_line,
602 with_column_name,
603 with_column_content {
604 void format_error_message() const override {
605 std::snprintf(
606 error_message_buffer, sizeof(error_message_buffer),
607 R"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)",
608 column_content, column_name, file_name, file_line);
609 }
610};
611
612struct integer_underflow : base,
613 with_file_name,
614 with_file_line,
615 with_column_name,
616 with_column_content {
617 void format_error_message() const override {
618 std::snprintf(
619 error_message_buffer, sizeof(error_message_buffer),
620 R"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)",
621 column_content, column_name, file_name, file_line);
622 }
623};
624
625struct invalid_single_character : base,
626 with_file_name,
627 with_file_line,
628 with_column_name,
629 with_column_content {
630 void format_error_message() const override {
631 std::snprintf(
632 error_message_buffer, sizeof(error_message_buffer),
633 R"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)",
634 column_content, column_name, file_name, file_line);
635 }
636};
637} // namespace error
638
639using ignore_column = unsigned int;
640static const ignore_column ignore_no_column = 0;
641static const ignore_column ignore_extra_column = 1;
642static const ignore_column ignore_missing_column = 2;
643
644template <char... trim_char_list> struct trim_chars {
645private:
646 constexpr static bool is_trim_char(char) { return false; }
647
648 template <class... OtherTrimChars>
649 constexpr static bool is_trim_char(char c, char trim_char,
651 return c == trim_char || is_trim_char(c, other_trim_chars...);
652 }
653
654public:
655 static void trim(char *&str_begin, char *&str_end) {
656 while (str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
657 ++str_begin;
658 while (str_begin != str_end &&
659 is_trim_char(*(str_end - 1), trim_char_list...))
660 --str_end;
661 *str_end = '\0';
662 }
663};
664
665struct no_comment {
666 static bool is_comment(const char *) { return false; }
667};
668
669template <char... comment_start_char_list> struct single_line_comment {
670private:
671 constexpr static bool is_comment_start_char(char) { return false; }
672
673 template <class... OtherCommentStartChars>
674 constexpr static bool
675 is_comment_start_char(char c, char comment_start_char,
677 return c == comment_start_char ||
678 is_comment_start_char(c, other_comment_start_chars...);
679 }
680
681public:
682 static bool is_comment(const char *line) {
683 return is_comment_start_char(*line, comment_start_char_list...);
684 }
685};
686
687struct empty_line_comment {
688 static bool is_comment(const char *line) {
689 if (*line == '\0')
690 return true;
691 while (*line == ' ' || *line == '\t') {
692 ++line;
693 if (*line == 0)
694 return true;
695 }
696 return false;
697 }
698};
699
700template <char... comment_start_char_list>
701struct single_and_empty_line_comment {
702 static bool is_comment(const char *line) {
703 return single_line_comment<comment_start_char_list...>::is_comment(line) ||
704 empty_line_comment::is_comment(line);
705 }
706};
707
708template <char sep> struct no_quote_escape {
709 static const char *find_next_column_end(const char *col_begin) {
710 while (*col_begin != sep && *col_begin != '\0')
711 ++col_begin;
712 return col_begin;
713 }
714
715 static void unescape(char *&, char *&) {}
716};
717
718template <char sep, char quote> struct double_quote_escape {
719 static const char *find_next_column_end(const char *col_begin) {
720 while (*col_begin != sep && *col_begin != '\0')
721 if (*col_begin != quote)
722 ++col_begin;
723 else {
724 do {
725 ++col_begin;
726 while (*col_begin != quote) {
727 if (*col_begin == '\0')
728 throw error::escaped_string_not_closed();
729 ++col_begin;
730 }
731 ++col_begin;
732 } while (*col_begin == quote);
733 }
734 return col_begin;
735 }
736
737 static void unescape(char *&col_begin, char *&col_end) {
738 if (col_end - col_begin >= 2) {
739 if (*col_begin == quote && *(col_end - 1) == quote) {
740 ++col_begin;
741 --col_end;
742 char *out = col_begin;
743 for (char *in = col_begin; in != col_end; ++in) {
744 if (*in == quote && (in + 1) != col_end && *(in + 1) == quote) {
745 ++in;
746 }
747 *out = *in;
748 ++out;
749 }
750 col_end = out;
751 *col_end = '\0';
752 }
753 }
754 }
755};
756
757struct throw_on_overflow {
758 template <class T> static void on_overflow(T &) {
759 throw error::integer_overflow();
760 }
761
762 template <class T> static void on_underflow(T &) {
763 throw error::integer_underflow();
764 }
765};
766
767struct ignore_overflow {
768 template <class T> static void on_overflow(T &) {}
769
770 template <class T> static void on_underflow(T &) {}
771};
772
773struct set_to_max_on_overflow {
774 template <class T> static void on_overflow(T &x) {
775 // using (std::numeric_limits<T>::max) instead of
776 // std::numeric_limits<T>::max to make code including windows.h with its max
777 // macro happy
778 x = (std::numeric_limits<T>::max)();
779 }
780
781 template <class T> static void on_underflow(T &x) {
782 x = (std::numeric_limits<T>::min)();
783 }
784};
785
786namespace detail {
787template <class quote_policy>
788void chop_next_column(char *&line, char *&col_begin, char *&col_end) {
789 assert(line != nullptr);
790
791 col_begin = line;
792 // the col_begin + (... - col_begin) removes the constness
793 col_end =
794 col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
795
796 if (*col_end == '\0') {
797 line = nullptr;
798 } else {
799 *col_end = '\0';
800 line = col_end + 1;
801 }
802}
803
804template <class trim_policy, class quote_policy>
805void parse_line(char *line, char **sorted_col,
806 const std::vector<int> &col_order) {
807 for (int i : col_order) {
808 if (line == nullptr)
809 throw ::io::error::too_few_columns();
810 char *col_begin, *col_end;
812
813 if (i != -1) {
814 trim_policy::trim(col_begin, col_end);
815 quote_policy::unescape(col_begin, col_end);
816
818 }
819 }
820 if (line != nullptr)
821 throw ::io::error::too_many_columns();
822}
823
824template <unsigned column_count, class trim_policy, class quote_policy>
825void parse_header_line(char *line, std::vector<int> &col_order,
826 const std::string *col_name,
827 ignore_column ignore_policy) {
828 col_order.clear();
829
830 bool found[column_count];
831 std::fill(found, found + column_count, false);
832 while (line) {
833 char *col_begin, *col_end;
835
836 trim_policy::trim(col_begin, col_end);
837 quote_policy::unescape(col_begin, col_end);
838
839 for (unsigned i = 0; i < column_count; ++i)
840 if (col_begin == col_name[i]) {
841 if (found[i]) {
842 error::duplicated_column_in_header err;
843 err.set_column_name(col_begin);
844 throw err;
845 }
846 found[i] = true;
847 col_order.push_back(i);
848 col_begin = 0;
849 break;
850 }
851 if (col_begin) {
852 if (ignore_policy & ::io::ignore_extra_column)
853 col_order.push_back(-1);
854 else {
855 error::extra_column_in_header err;
856 err.set_column_name(col_begin);
857 throw err;
858 }
859 }
860 }
861 if (!(ignore_policy & ::io::ignore_missing_column)) {
862 for (unsigned i = 0; i < column_count; ++i) {
863 if (!found[i]) {
864 error::missing_column_in_header err;
865 err.set_column_name(col_name[i].c_str());
866 throw err;
867 }
868 }
869 }
870}
871
872template <class overflow_policy> void parse(char *col, char &x) {
873 if (!*col)
874 throw error::invalid_single_character();
875 x = *col;
876 ++col;
877 if (*col)
878 throw error::invalid_single_character();
879}
880
881template <class overflow_policy> void parse(char *col, std::string &x) {
882 x = col;
883}
884
885template <class overflow_policy> void parse(char *col, const char *&x) {
886 x = col;
887}
888
889template <class overflow_policy> void parse(char *col, char *&x) { x = col; }
890
891template <class overflow_policy, class T>
892void parse_unsigned_integer(const char *col, T &x) {
893 x = 0;
894 while (*col != '\0') {
895 if ('0' <= *col && *col <= '9') {
896 T y = *col - '0';
897 if (x > ((std::numeric_limits<T>::max)() - y) / 10) {
898 overflow_policy::on_overflow(x);
899 return;
900 }
901 x = 10 * x + y;
902 } else
903 throw error::no_digit();
904 ++col;
905 }
906}
907
908template <class overflow_policy> void parse(char *col, unsigned char &x) {
910}
911template <class overflow_policy> void parse(char *col, unsigned short &x) {
913}
914template <class overflow_policy> void parse(char *col, unsigned int &x) {
916}
917template <class overflow_policy> void parse(char *col, unsigned long &x) {
919}
920template <class overflow_policy> void parse(char *col, unsigned long long &x) {
922}
923
924template <class overflow_policy, class T>
925void parse_signed_integer(const char *col, T &x) {
926 if (*col == '-') {
927 ++col;
928
929 x = 0;
930 while (*col != '\0') {
931 if ('0' <= *col && *col <= '9') {
932 T y = *col - '0';
933 if (x < ((std::numeric_limits<T>::min)() + y) / 10) {
934 overflow_policy::on_underflow(x);
935 return;
936 }
937 x = 10 * x - y;
938 } else
939 throw error::no_digit();
940 ++col;
941 }
942 return;
943 } else if (*col == '+')
944 ++col;
946}
947
948template <class overflow_policy> void parse(char *col, signed char &x) {
950}
951template <class overflow_policy> void parse(char *col, signed short &x) {
953}
954template <class overflow_policy> void parse(char *col, signed int &x) {
956}
957template <class overflow_policy> void parse(char *col, signed long &x) {
959}
960template <class overflow_policy> void parse(char *col, signed long long &x) {
962}
963
964template <class T> void parse_float(const char *col, T &x) {
965 bool is_neg = false;
966 if (*col == '-') {
967 is_neg = true;
968 ++col;
969 } else if (*col == '+')
970 ++col;
971
972 x = 0;
973 while ('0' <= *col && *col <= '9') {
974 int y = *col - '0';
975 x *= 10;
976 x += y;
977 ++col;
978 }
979
980 if (*col == '.' || *col == ',') {
981 ++col;
982 T pos = 1;
983 while ('0' <= *col && *col <= '9') {
984 pos /= 10;
985 int y = *col - '0';
986 ++col;
987 x += y * pos;
988 }
989 }
990
991 if (*col == 'e' || *col == 'E') {
992 ++col;
993 int e;
994
996
997 if (e != 0) {
998 T base;
999 if (e < 0) {
1000 base = T(0.1);
1001 e = -e;
1002 } else {
1003 base = T(10);
1004 }
1005
1006 while (e != 1) {
1007 if ((e & 1) == 0) {
1008 base = base * base;
1009 e >>= 1;
1010 } else {
1011 x *= base;
1012 --e;
1013 }
1014 }
1015 x *= base;
1016 }
1017 } else {
1018 if (*col != '\0')
1019 throw error::no_digit();
1020 }
1021
1022 if (is_neg)
1023 x = -x;
1024}
1025
1026template <class overflow_policy> void parse(char *col, float &x) {
1027 parse_float(col, x);
1028}
1029template <class overflow_policy> void parse(char *col, double &x) {
1030 parse_float(col, x);
1031}
1032template <class overflow_policy> void parse(char *col, long double &x) {
1033 parse_float(col, x);
1034}
1035
1036template <class overflow_policy, class T> void parse(char *col, T &x) {
1037 // Mute unused variable compiler warning
1038 (void)col;
1039 (void)x;
1040 // GCC evaluates "false" when reading the template and
1041 // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
1042 // this strange construct is used.
1043 static_assert(sizeof(T) != sizeof(T),
1044 "Can not parse this type. Only builtin integrals, floats, "
1045 "char, char*, const char* and std::string are supported");
1046}
1047
1048} // namespace detail
1049
1050template <unsigned column_count, class trim_policy = trim_chars<' ', '\t'>,
1051 class quote_policy = no_quote_escape<','>,
1052 class overflow_policy = throw_on_overflow,
1053 class comment_policy = no_comment>
1054class CSVReader {
1055private:
1056 LineReader in;
1057
1058 char *row[column_count];
1059 std::string column_names[column_count];
1060
1061 std::vector<int> col_order;
1062
1063 template <class... ColNames>
1064 void set_column_names(std::string s, ColNames... cols) {
1065 column_names[column_count - sizeof...(ColNames) - 1] = std::move(s);
1066 set_column_names(std::forward<ColNames>(cols)...);
1067 }
1068
1069 void set_column_names() {}
1070
1071public:
1072 CSVReader() = delete;
1073 CSVReader(const CSVReader &) = delete;
1074 CSVReader &operator=(const CSVReader &);
1075
1076 template <class... Args>
1077 explicit CSVReader(Args &&... args) : in(std::forward<Args>(args)...) {
1078 std::fill(row, row + column_count, nullptr);
1079 col_order.resize(column_count);
1080 for (unsigned i = 0; i < column_count; ++i)
1081 col_order[i] = i;
1082 for (unsigned i = 1; i <= column_count; ++i)
1083 column_names[i - 1] = "col" + std::to_string(i);
1084 }
1085
1086 char *next_line() { return in.next_line(); }
1087
1088 template <class... ColNames>
1089 void read_header(ignore_column ignore_policy, ColNames... cols) {
1090 static_assert(sizeof...(ColNames) >= column_count,
1091 "not enough column names specified");
1092 static_assert(sizeof...(ColNames) <= column_count,
1093 "too many column names specified");
1094 try {
1095 set_column_names(std::forward<ColNames>(cols)...);
1096
1097 char *line;
1098 do {
1099 line = in.next_line();
1100 if (!line)
1101 throw error::header_missing();
1102 } while (comment_policy::is_comment(line));
1103
1104 detail::parse_header_line<column_count, trim_policy, quote_policy>(
1105 line, col_order, column_names, ignore_policy);
1106 } catch (error::with_file_name &err) {
1107 err.set_file_name(in.get_truncated_file_name());
1108 throw;
1109 }
1110 }
1111
1112 template <class... ColNames> void set_header(ColNames... cols) {
1113 static_assert(sizeof...(ColNames) >= column_count,
1114 "not enough column names specified");
1115 static_assert(sizeof...(ColNames) <= column_count,
1116 "too many column names specified");
1117 set_column_names(std::forward<ColNames>(cols)...);
1118 std::fill(row, row + column_count, nullptr);
1119 col_order.resize(column_count);
1120 for (unsigned i = 0; i < column_count; ++i)
1121 col_order[i] = i;
1122 }
1123
1124 bool has_column(const std::string &name) const {
1125 return col_order.end() !=
1126 std::find(col_order.begin(), col_order.end(),
1127 std::find(std::begin(column_names), std::end(column_names),
1128 name) -
1129 std::begin(column_names));
1130 }
1131
1132 void set_file_name(const std::string &file_name) {
1133 in.set_file_name(file_name);
1134 }
1135
1136 void set_file_name(const char *file_name) { in.set_file_name(file_name); }
1137
1138 const char *get_truncated_file_name() const {
1139 return in.get_truncated_file_name();
1140 }
1141
1142 void set_file_line(unsigned file_line) { in.set_file_line(file_line); }
1143
1144 unsigned get_file_line() const { return in.get_file_line(); }
1145
1146private:
1147 void parse_helper(std::size_t) {}
1148
1149 template <class T, class... ColType>
1150 void parse_helper(std::size_t r, T &t, ColType &... cols) {
1151 if (row[r]) {
1152 try {
1153 try {
1154 ::io::detail::parse<overflow_policy>(row[r], t);
1155 } catch (error::with_column_content &err) {
1156 err.set_column_content(row[r]);
1157 throw;
1158 }
1159 } catch (error::with_column_name &err) {
1160 err.set_column_name(column_names[r].c_str());
1161 throw;
1162 }
1163 }
1164 parse_helper(r + 1, cols...);
1165 }
1166
1167public:
1168 template <class... ColType> bool read_row(ColType &... cols) {
1169 static_assert(sizeof...(ColType) >= column_count,
1170 "not enough columns specified");
1171 static_assert(sizeof...(ColType) <= column_count,
1172 "too many columns specified");
1173 try {
1174 try {
1175
1176 char *line;
1177 do {
1178 line = in.next_line();
1179 if (!line)
1180 return false;
1181 } while (comment_policy::is_comment(line));
1182
1183 detail::parse_line<trim_policy, quote_policy>(line, row, col_order);
1184
1185 parse_helper(0, cols...);
1186 } catch (error::with_file_name &err) {
1187 err.set_file_name(in.get_truncated_file_name());
1188 throw;
1189 }
1190 } catch (error::with_file_line &err) {
1191 err.set_file_line(in.get_file_line());
1192 throw;
1193 }
1194
1195 return true;
1196 }
1197};
1198} // namespace io
1199#endif
1200
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
QAction * forward(const QObject *recvr, const char *slot, QObject *parent)
FeedPtr parse(const DocumentSource &src, const QString &formatHint=QString())
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri May 24 2024 11:49:21 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.