42#ifndef CSV_IO_NO_THREAD
43#include <condition_variable>
59struct base : std::exception {
60 virtual void format_error_message()
const = 0;
63 format_error_message();
64 return error_message_buffer;
67 mutable char error_message_buffer[2048];
71const int max_file_name_length = 1024;
73struct with_file_name {
74 with_file_name() { std::memset(file_name, 0,
sizeof(file_name)); }
76 void set_file_name(
const char *file_name) {
77 if (file_name !=
nullptr) {
80 (
strncpy(this->file_name, file_name,
sizeof(this->file_name)));
81 this->file_name[
sizeof(this->file_name) - 1] =
'\0';
83 this->file_name[0] =
'\0';
87 char file_name[max_file_name_length + 1];
90struct with_file_line {
91 with_file_line() { file_line = -1; }
93 void set_file_line(
int file_line) { this->file_line = file_line; }
99 with_errno() { errno_value = 0; }
101 void set_errno(
int errno_value) { this->errno_value = errno_value; }
106struct can_not_open_file : base, with_file_name, with_errno {
107 void format_error_message()
const override {
108 if (errno_value != 0)
109 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
110 "Can not open file \"%s\" because \"%s\".", file_name,
111 std::strerror(errno_value));
113 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
114 "Can not open file \"%s\".", file_name);
118struct line_length_limit_exceeded : base, with_file_name, with_file_line {
119 void format_error_message()
const override {
121 error_message_buffer,
sizeof(error_message_buffer),
122 "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1.",
123 file_line, file_name);
128class ByteSourceBase {
130 virtual int read(
char *buffer,
int size) = 0;
131 virtual ~ByteSourceBase() {}
136class OwningStdIOByteSourceBase :
public ByteSourceBase {
138 explicit OwningStdIOByteSourceBase(
FILE *file) : file(file) {
140 std::setvbuf(file, 0,
_IONBF, 0);
143 int read(
char *buffer,
int size) {
return std::fread(buffer, 1, size, file); }
145 ~OwningStdIOByteSourceBase() { std::fclose(file); }
151class NonOwningIStreamByteSource :
public ByteSourceBase {
153 explicit NonOwningIStreamByteSource(std::istream &in) : in(in) {}
155 int read(
char *buffer,
int size) {
156 in.read(buffer, size);
160 ~NonOwningIStreamByteSource() {}
166class NonOwningStringByteSource :
public ByteSourceBase {
168 NonOwningStringByteSource(
const char *str,
long long size)
169 : str(str), remaining_byte_count(size) {}
171 int read(
char *buffer,
int desired_byte_count) {
181 ~NonOwningStringByteSource() {}
185 long long remaining_byte_count;
188#ifndef CSV_IO_NO_THREAD
189class AsynchronousReader {
192 std::unique_lock<std::mutex>
guard(lock);
194 desired_byte_count = -1;
195 termination_requested =
false;
196 worker = std::thread([&] {
197 std::unique_lock<std::mutex>
guard(lock);
200 read_requested_condition.wait(
guard, [&] {
201 return desired_byte_count != -1 || termination_requested;
203 if (termination_requested)
206 read_byte_count = byte_source->read(buffer, desired_byte_count);
207 desired_byte_count = -1;
208 if (read_byte_count == 0)
210 read_finished_condition.notify_one();
213 read_error = std::current_exception();
215 read_finished_condition.notify_one();
219 bool is_valid()
const {
return byte_source !=
nullptr; }
222 std::unique_lock<std::mutex>
guard(lock);
225 read_byte_count = -1;
226 read_requested_condition.notify_one();
230 std::unique_lock<std::mutex>
guard(lock);
231 read_finished_condition.wait(
232 guard, [&] {
return read_byte_count != -1 || read_error; });
234 std::rethrow_exception(read_error);
236 return read_byte_count;
239 ~AsynchronousReader() {
240 if (byte_source !=
nullptr) {
242 std::unique_lock<std::mutex>
guard(lock);
243 termination_requested =
true;
245 read_requested_condition.notify_one();
251 std::unique_ptr<ByteSourceBase> byte_source;
255 bool termination_requested;
256 std::exception_ptr read_error;
258 int desired_byte_count;
262 std::condition_variable read_finished_condition;
263 std::condition_variable read_requested_condition;
267class SynchronousReader {
273 bool is_valid()
const {
return byte_source !=
nullptr; }
280 int finish_read() {
return byte_source->read(buffer, desired_byte_count); }
283 std::unique_ptr<ByteSourceBase> byte_source;
285 int desired_byte_count;
291 static const int block_len = 1 << 20;
292 std::unique_ptr<char[]> buffer;
294#ifdef CSV_IO_NO_THREAD
295 detail::SynchronousReader reader;
297 detail::AsynchronousReader reader;
302 char file_name[error::max_file_name_length + 1];
305 static std::unique_ptr<ByteSourceBase> open_file(
const char *file_name) {
308 FILE *file = std::fopen(file_name,
"rb");
312 error::can_not_open_file
err;
314 err.set_file_name(file_name);
317 return std::unique_ptr<ByteSourceBase>(
318 new detail::OwningStdIOByteSourceBase(file));
321 void init(std::unique_ptr<ByteSourceBase> byte_source) {
324 buffer = std::unique_ptr<char[]>(
new char[3 * block_len]);
326 data_end = byte_source->read(buffer.get(), 2 * block_len);
329 if (data_end >= 3 && buffer[0] ==
'\xEF' && buffer[1] ==
'\xBB' &&
333 if (data_end == 2 * block_len) {
334 reader.init(std::move(byte_source));
335 reader.start_read(buffer.get() + 2 * block_len, block_len);
340 LineReader() =
delete;
341 LineReader(
const LineReader &) =
delete;
342 LineReader &operator=(
const LineReader &) =
delete;
344 explicit LineReader(
const char *file_name) {
345 set_file_name(file_name);
346 init(open_file(file_name));
349 explicit LineReader(
const std::string &file_name) {
350 set_file_name(file_name.c_str());
351 init(open_file(file_name.c_str()));
354 LineReader(
const char *file_name,
355 std::unique_ptr<ByteSourceBase> byte_source) {
356 set_file_name(file_name);
357 init(std::move(byte_source));
360 LineReader(
const std::string &file_name,
361 std::unique_ptr<ByteSourceBase> byte_source) {
362 set_file_name(file_name.c_str());
363 init(std::move(byte_source));
366 LineReader(
const char *file_name,
const char *data_begin,
367 const char *data_end) {
368 set_file_name(file_name);
369 init(std::unique_ptr<ByteSourceBase>(
new detail::NonOwningStringByteSource(
370 data_begin, data_end - data_begin)));
373 LineReader(
const std::string &file_name,
const char *data_begin,
374 const char *data_end) {
375 set_file_name(file_name.c_str());
376 init(std::unique_ptr<ByteSourceBase>(
new detail::NonOwningStringByteSource(
377 data_begin, data_end - data_begin)));
380 LineReader(
const char *file_name,
FILE *file) {
381 set_file_name(file_name);
382 init(std::unique_ptr<ByteSourceBase>(
383 new detail::OwningStdIOByteSourceBase(file)));
386 LineReader(
const std::string &file_name,
FILE *file) {
387 set_file_name(file_name.c_str());
388 init(std::unique_ptr<ByteSourceBase>(
389 new detail::OwningStdIOByteSourceBase(file)));
392 LineReader(
const char *file_name, std::istream &in) {
393 set_file_name(file_name);
394 init(std::unique_ptr<ByteSourceBase>(
395 new detail::NonOwningIStreamByteSource(in)));
398 LineReader(
const std::string &file_name, std::istream &in) {
399 set_file_name(file_name.c_str());
400 init(std::unique_ptr<ByteSourceBase>(
401 new detail::NonOwningIStreamByteSource(in)));
404 void set_file_name(
const std::string &file_name) {
405 set_file_name(file_name.c_str());
408 void set_file_name(
const char *file_name) {
409 if (file_name !=
nullptr) {
410 strncpy(this->file_name, file_name,
sizeof(this->file_name));
411 this->file_name[
sizeof(this->file_name) - 1] =
'\0';
413 this->file_name[0] =
'\0';
417 const char *get_truncated_file_name()
const {
return file_name; }
419 void set_file_line(
unsigned file_line) { this->file_line = file_line; }
421 unsigned get_file_line()
const {
return file_line; }
424 if (data_begin == data_end)
429 assert(data_begin < data_end);
430 assert(data_end <= block_len * 2);
432 if (data_begin >= block_len) {
433 std::memcpy(buffer.get(), buffer.get() + block_len, block_len);
434 data_begin -= block_len;
435 data_end -= block_len;
436 if (reader.is_valid()) {
437 data_end += reader.finish_read();
438 std::memcpy(buffer.get() + block_len, buffer.get() + 2 * block_len,
440 reader.start_read(buffer.get() + 2 * block_len, block_len);
449 if (
line_end - data_begin + 1 > block_len) {
450 error::line_length_limit_exceeded
err;
451 err.set_file_name(file_name);
452 err.set_file_line(file_line);
469 char *
ret = buffer.get() + data_begin;
480const int max_column_name_length = 63;
481struct with_column_name {
483 std::memset(column_name, 0, max_column_name_length + 1);
486 void set_column_name(
const char *column_name) {
487 if (column_name !=
nullptr) {
488 std::strncpy(this->column_name, column_name, max_column_name_length);
489 this->column_name[max_column_name_length] =
'\0';
491 this->column_name[0] =
'\0';
495 char column_name[max_column_name_length + 1];
498const int max_column_content_length = 63;
500struct with_column_content {
501 with_column_content() {
502 std::memset(column_content, 0, max_column_content_length + 1);
505 void set_column_content(
const char *column_content) {
506 if (column_content !=
nullptr) {
507 std::strncpy(this->column_content, column_content,
508 max_column_content_length);
509 this->column_content[max_column_content_length] =
'\0';
511 this->column_content[0] =
'\0';
515 char column_content[max_column_content_length + 1];
518struct extra_column_in_header : base, with_file_name, with_column_name {
519 void format_error_message()
const override {
520 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
521 R
"(Extra column "%s" in header of file "%s".)", column_name,
526struct missing_column_in_header : base, with_file_name, with_column_name {
527 void format_error_message()
const override {
528 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
529 R
"(Missing column "%s" in header of file "%s".)", column_name,
534struct duplicated_column_in_header : base, with_file_name, with_column_name {
535 void format_error_message()
const override {
536 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
537 R
"(Duplicated column "%s" in header of file "%s".)",
538 column_name, file_name);
542struct header_missing : base, with_file_name {
543 void format_error_message()
const override {
544 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
545 "Header missing in file \"%s\".", file_name);
549struct too_few_columns : base, with_file_name, with_file_line {
550 void format_error_message()
const override {
551 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
552 "Too few columns in line %d in file \"%s\".", file_line,
557struct too_many_columns : base, with_file_name, with_file_line {
558 void format_error_message()
const override {
559 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
560 "Too many columns in line %d in file \"%s\".", file_line,
565struct escaped_string_not_closed : base, with_file_name, with_file_line {
566 void format_error_message()
const override {
567 std::snprintf(error_message_buffer,
sizeof(error_message_buffer),
568 "Escaped string was not closed in line %d in file \"%s\".",
569 file_line, file_name);
573struct integer_must_be_positive : base,
577 with_column_content {
578 void format_error_message()
const override {
580 error_message_buffer,
sizeof(error_message_buffer),
581 R
"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)",
582 column_content, column_name, file_name, file_line);
586struct no_digit : base,
590 with_column_content {
591 void format_error_message()
const override {
593 error_message_buffer,
sizeof(error_message_buffer),
594 R
"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)",
595 column_content, column_name, file_name, file_line);
599struct integer_overflow : base,
603 with_column_content {
604 void format_error_message()
const override {
606 error_message_buffer,
sizeof(error_message_buffer),
607 R
"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)",
608 column_content, column_name, file_name, file_line);
612struct integer_underflow : base,
616 with_column_content {
617 void format_error_message()
const override {
619 error_message_buffer,
sizeof(error_message_buffer),
620 R
"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)",
621 column_content, column_name, file_name, file_line);
625struct invalid_single_character : base,
629 with_column_content {
630 void format_error_message()
const override {
632 error_message_buffer,
sizeof(error_message_buffer),
633 R
"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)",
634 column_content, column_name, file_name, file_line);
639using ignore_column =
unsigned int;
640static const ignore_column ignore_no_column = 0;
641static const ignore_column ignore_extra_column = 1;
642static const ignore_column ignore_missing_column = 2;
646 constexpr static bool is_trim_char(
char) {
return false; }
649 constexpr static bool is_trim_char(
char c,
char trim_char,
666 static bool is_comment(
const char *) {
return false; }
671 constexpr static bool is_comment_start_char(
char) {
return false; }
674 constexpr static bool
682 static bool is_comment(
const char *line) {
687struct empty_line_comment {
688 static bool is_comment(
const char *line) {
691 while (*line ==
' ' || *line ==
'\t') {
701struct single_and_empty_line_comment {
702 static bool is_comment(
const char *line) {
703 return single_line_comment<comment_start_char_list...>::is_comment(line) ||
704 empty_line_comment::is_comment(line);
708template <
char sep>
struct no_quote_escape {
709 static const char *find_next_column_end(
const char *
col_begin) {
715 static void unescape(
char *&,
char *&) {}
718template <
char sep,
char quote>
struct double_quote_escape {
719 static const char *find_next_column_end(
const char *
col_begin) {
728 throw error::escaped_string_not_closed();
744 if (*in == quote && (in + 1) !=
col_end && *(in + 1) == quote) {
757struct throw_on_overflow {
758 template <
class T>
static void on_overflow(T &) {
759 throw error::integer_overflow();
762 template <
class T>
static void on_underflow(T &) {
763 throw error::integer_underflow();
767struct ignore_overflow {
768 template <
class T>
static void on_overflow(T &) {}
770 template <
class T>
static void on_underflow(T &) {}
773struct set_to_max_on_overflow {
774 template <
class T>
static void on_overflow(T &x) {
778 x = (std::numeric_limits<T>::max)();
781 template <
class T>
static void on_underflow(T &x) {
782 x = (std::numeric_limits<T>::min)();
787template <
class quote_policy>
804template <
class trim_policy,
class quote_policy>
806 const std::vector<int> &col_order) {
807 for (
int i : col_order) {
809 throw ::io::error::too_few_columns();
821 throw ::io::error::too_many_columns();
824template <
unsigned column_count,
class trim_policy,
class quote_policy>
825void parse_header_line(
char *line, std::vector<int> &col_order,
842 error::duplicated_column_in_header
err;
847 col_order.push_back(i);
853 col_order.push_back(-1);
855 error::extra_column_in_header
err;
864 error::missing_column_in_header
err;
872template <
class overflow_policy>
void parse(
char *
col,
char &x) {
874 throw error::invalid_single_character();
878 throw error::invalid_single_character();
881template <
class overflow_policy>
void parse(
char *
col, std::string &x) {
885template <
class overflow_policy>
void parse(
char *
col,
const char *&x) {
889template <
class overflow_policy>
void parse(
char *
col,
char *&x) { x =
col; }
891template <
class overflow_policy,
class T>
892void parse_unsigned_integer(
const char *
col, T &x) {
894 while (*
col !=
'\0') {
895 if (
'0' <= *
col && *
col <=
'9') {
897 if (x > ((std::numeric_limits<T>::max)() - y) / 10) {
898 overflow_policy::on_overflow(x);
903 throw error::no_digit();
908template <
class overflow_policy>
void parse(
char *
col,
unsigned char &x) {
911template <
class overflow_policy>
void parse(
char *
col,
unsigned short &x) {
914template <
class overflow_policy>
void parse(
char *
col,
unsigned int &x) {
917template <
class overflow_policy>
void parse(
char *
col,
unsigned long &x) {
920template <
class overflow_policy>
void parse(
char *
col,
unsigned long long &x) {
924template <
class overflow_policy,
class T>
925void parse_signed_integer(
const char *
col, T &x) {
930 while (*
col !=
'\0') {
931 if (
'0' <= *
col && *
col <=
'9') {
933 if (x < ((std::numeric_limits<T>::min)() + y) / 10) {
934 overflow_policy::on_underflow(x);
939 throw error::no_digit();
943 }
else if (*
col ==
'+')
948template <
class overflow_policy>
void parse(
char *
col,
signed char &x) {
951template <
class overflow_policy>
void parse(
char *
col,
signed short &x) {
954template <
class overflow_policy>
void parse(
char *
col,
signed int &x) {
957template <
class overflow_policy>
void parse(
char *
col,
signed long &x) {
960template <
class overflow_policy>
void parse(
char *
col,
signed long long &x) {
964template <
class T>
void parse_float(
const char *
col, T &x) {
969 }
else if (*
col ==
'+')
973 while (
'0' <= *
col && *
col <=
'9') {
980 if (*
col ==
'.' || *
col ==
',') {
983 while (
'0' <= *
col && *
col <=
'9') {
991 if (*
col ==
'e' || *
col ==
'E') {
1019 throw error::no_digit();
1026template <
class overflow_policy>
void parse(
char *
col,
float &x) {
1027 parse_float(
col, x);
1029template <
class overflow_policy>
void parse(
char *
col,
double &x) {
1030 parse_float(
col, x);
1032template <
class overflow_policy>
void parse(
char *
col,
long double &x) {
1033 parse_float(
col, x);
1036template <
class overflow_policy,
class T>
void parse(
char *
col, T &x) {
1043 static_assert(
sizeof(T) !=
sizeof(T),
1044 "Can not parse this type. Only builtin integrals, floats, "
1045 "char, char*, const char* and std::string are supported");
1050template <
unsigned column_count,
class trim_policy = trim_
chars<' ', '\t'>,
1051 class quote_policy = no_quote_escape<','>,
1052 class overflow_policy = throw_on_overflow,
1053 class comment_policy = no_comment>
1061 std::vector<int> col_order;
1064 void set_column_names(std::string s,
ColNames...
cols) {
1065 column_names[
column_count -
sizeof...(ColNames) - 1] = std::move(s);
1066 set_column_names(std::forward<ColNames>(
cols)...);
1069 void set_column_names() {}
1072 CSVReader() =
delete;
1073 CSVReader(
const CSVReader &) =
delete;
1074 CSVReader &operator=(
const CSVReader &);
1076 template <
class...
Args>
1083 column_names[i - 1] =
"col" + std::to_string(i);
1086 char *next_line() {
return in.next_line(); }
1091 "not enough column names specified");
1093 "too many column names specified");
1095 set_column_names(std::forward<ColNames>(
cols)...);
1099 line = in.next_line();
1101 throw error::header_missing();
1102 }
while (comment_policy::is_comment(line));
1104 detail::parse_header_line<column_count, trim_policy, quote_policy>(
1106 }
catch (error::with_file_name &
err) {
1107 err.set_file_name(in.get_truncated_file_name());
1114 "not enough column names specified");
1116 "too many column names specified");
1117 set_column_names(std::forward<ColNames>(
cols)...);
1124 bool has_column(
const std::string &name)
const {
1125 return col_order.end() !=
1126 std::find(col_order.begin(), col_order.end(),
1127 std::find(std::begin(column_names), std::end(column_names),
1129 std::begin(column_names));
1132 void set_file_name(
const std::string &file_name) {
1133 in.set_file_name(file_name);
1136 void set_file_name(
const char *file_name) { in.set_file_name(file_name); }
1138 const char *get_truncated_file_name()
const {
1139 return in.get_truncated_file_name();
1142 void set_file_line(
unsigned file_line) { in.set_file_line(file_line); }
1144 unsigned get_file_line()
const {
return in.get_file_line(); }
1147 void parse_helper(std::size_t) {}
1149 template <
class T,
class...
ColType>
1150 void parse_helper(std::size_t r, T &t,
ColType &...
cols) {
1154 ::io::detail::parse<overflow_policy>(row[r], t);
1155 }
catch (error::with_column_content &
err) {
1156 err.set_column_content(row[r]);
1159 }
catch (error::with_column_name &
err) {
1160 err.set_column_name(column_names[r].
c_str());
1164 parse_helper(r + 1,
cols...);
1170 "not enough columns specified");
1172 "too many columns specified");
1178 line = in.next_line();
1181 }
while (comment_policy::is_comment(line));
1183 detail::parse_line<trim_policy, quote_policy>(line, row, col_order);
1185 parse_helper(0,
cols...);
1186 }
catch (error::with_file_name &
err) {
1187 err.set_file_name(in.get_truncated_file_name());
1190 }
catch (error::with_file_line &
err) {
1191 err.set_file_line(in.get_file_line());
KHEALTHCERTIFICATE_EXPORT QVariant parse(const QByteArray &data)
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
const QList< QKeySequence > & forward()