ext/fastcsv/fastcsv.c in fastcsv-0.0.4 vs ext/fastcsv/fastcsv.c in fastcsv-0.0.5

- old
+ new

@@ -22,10 +22,13 @@ } #define FREE \ if (buf != NULL) { \ free(buf); \ +} \ +if (row_sep != NULL) { \ + free(row_sep); \ } static VALUE cClass, cParser, eError; static ID s_read, s_row; @@ -33,23 +36,23 @@ typedef struct { char *start; } Data; -#line 152 "ext/fastcsv/fastcsv.rl" +#line 169 "ext/fastcsv/fastcsv.rl" -#line 43 "ext/fastcsv/fastcsv.c" -static const int raw_parse_start = 4; -static const int raw_parse_first_final = 4; +#line 46 "ext/fastcsv/fastcsv.c" +static const int raw_parse_start = 5; +static const int raw_parse_first_final = 5; static const int raw_parse_error = 0; -static const int raw_parse_en_main = 4; +static const int raw_parse_en_main = 5; -#line 155 "ext/fastcsv/fastcsv.rl" +#line 172 "ext/fastcsv/fastcsv.rl" // 16 kB #define BUFSIZE 16384 // @see http://rxr.whitequark.org/mri/source/io.c#4845 @@ -78,22 +81,22 @@ } } static VALUE raw_parse(int argc, VALUE *argv, VALUE self) { int cs, act, have = 0, curline = 1, io = 0; - char *ts = 0, *te = 0, *buf = 0, *eof = 0; + char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = 0; VALUE port, opts, r_encoding; VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil; - int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0; + int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0, len_row_sep = 0; rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL; Data *d; Data_Get_Struct(self, Data, d); VALUE option; - char quote_char = '"'; + char quote_char = '"', col_sep = ','; rb_scan_args(argc, argv, "11", &port, &opts); taint = OBJ_TAINTED(port); io = rb_respond_to(port, s_read); if (!io) { @@ -111,10 +114,26 @@ } else if (TYPE(opts) != T_HASH) { rb_raise(rb_eArgError, "options has to be a Hash or nil"); } + option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char"))); + if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) { + quote_char = *StringValueCStr(option); + } + else if (!NIL_P(option)) { + rb_raise(rb_eArgError, ":quote_char has to be a single character String"); + } + + option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep"))); + if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) { + col_sep = *StringValueCStr(option); + } + else if (!NIL_P(option)) { + rb_raise(rb_eArgError, ":col_sep has to be a single character String"); + } + // @see rb_io_extract_modeenc /* Set to defaults */ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0); // "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external). @@ -241,40 +260,42 @@ if (io) { buf = ALLOC_N(char, buffer_size); } -#line 247 "ext/fastcsv/fastcsv.c" +#line 266 "ext/fastcsv/fastcsv.c" { cs = raw_parse_start; ts = 0; te = 0; act = 0; } -#line 350 "ext/fastcsv/fastcsv.rl" +#line 383 "ext/fastcsv/fastcsv.rl" while (!done) { VALUE str; char *p, *pe; - int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff; + int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff, mark_row_sep_diff; if (io) { if (space == 0) { // Not moving d->start will cause intermittent segmentation faults. tokstart_diff = ts - buf; tokend_diff = te - buf; start_diff = d->start - buf; + mark_row_sep_diff = mark_row_sep - buf; buffer_size += BUFSIZE; REALLOC_N(buf, char, buffer_size); space = buffer_size - have; ts = buf + tokstart_diff; te = buf + tokend_diff; d->start = buf + start_diff; + mark_row_sep = buf + mark_row_sep_diff; } p = buf + have; // Reads "`length` bytes without any conversion (binary mode)." // "The resulted string is always ASCII-8BIT encoding." @@ -309,12 +330,13 @@ d->start = p; } pe = p + len; -#line 315 "ext/fastcsv/fastcsv.c" +#line 336 "ext/fastcsv/fastcsv.c" { + short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) { tr0: @@ -326,35 +348,85 @@ default: {{p = ((te))-1;}} break; } } - goto st4; + goto st5; tr5: -#line 46 "ext/fastcsv/fastcsv.rl" +#line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 92 "ext/fastcsv/fastcsv.rl" +#line 137 "ext/fastcsv/fastcsv.rl" { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } +#line 167 "ext/fastcsv/fastcsv.rl" + {te = p+1;} + goto st5; +tr6: +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 95 "ext/fastcsv/fastcsv.rl" + { rb_ary_push(row, field); field = Qnil; } -#line 148 "ext/fastcsv/fastcsv.rl" +#line 165 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st4; -tr9: -#line 120 "ext/fastcsv/fastcsv.rl" + goto st5; +tr7: +#line 49 "ext/fastcsv/fastcsv.rl" { - if (d->start == 0 || p == d->start) { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {te = p+1;} +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row rb_ivar_set(self, s_row, rb_str_new2("")); } else if (p > d->start) { rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); } @@ -365,82 +437,278 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } -#line 150 "ext/fastcsv/fastcsv.rl" + goto st5; +tr13: +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } +#line 167 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st4; -tr12: -#line 92 "ext/fastcsv/fastcsv.rl" + goto st5; +tr19: +#line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); field = Qnil; } -#line 148 "ext/fastcsv/fastcsv.rl" +#line 165 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st4; -tr15: -#line 150 "ext/fastcsv/fastcsv.rl" + goto st5; +tr20: +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {te = p+1;} +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } + goto st5; +tr42: +#line 167 "ext/fastcsv/fastcsv.rl" {te = p;p--;} - goto st4; -tr16: -#line 97 "ext/fastcsv/fastcsv.rl" + goto st5; +tr43: +#line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } } -#line 149 "ext/fastcsv/fastcsv.rl" - {te = p;p--;} - goto st4; -st4: #line 1 "NONE" + { switch( act ) { + case 0: + {{goto st0;}} + break; + default: + {{p = ((te))-1;}} + break; + } + } + goto st5; +tr50: +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } +#line 167 "ext/fastcsv/fastcsv.rl" + {te = p+1;} + goto st5; +tr56: +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {te = p+1;} +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } + goto st5; +st5: +#line 1 "NONE" {ts = 0;} #line 1 "NONE" {act = 0;} if ( ++p == pe ) - goto _test_eof4; -case 4: + goto _test_eof5; +case 5: #line 1 "NONE" {ts = p;} -#line 405 "ext/fastcsv/fastcsv.c" - switch( (*p) ) { - case 0: goto tr13; - case 10: goto tr3; - case 13: goto tr4; - case 34: goto tr14; - case 44: goto tr5; +#line 624 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + switch( _widec ) { + case 1280: goto tr33; + case 1290: goto tr3; + case 1293: goto tr4; + case 1536: goto tr35; + case 1546: goto tr36; + case 1549: goto tr37; + case 1792: goto tr7; + case 1802: goto tr8; + case 1805: goto tr9; + case 2048: goto tr39; + case 2058: goto tr40; + case 2061: goto tr41; } - goto st1; + if ( _widec < 1408 ) { + if ( 1152 <= _widec && _widec <= 1407 ) + goto st1; + } else if ( _widec > 1663 ) { + if ( _widec > 1919 ) { + if ( 1920 <= _widec && _widec <= 2175 ) + goto tr38; + } else if ( _widec >= 1664 ) + goto tr6; + } else + goto tr34; + goto st0; +st0: +cs = 0; + goto _out; st1: if ( ++p == pe ) goto _test_eof1; case 1: - switch( (*p) ) { - case 0: goto tr2; - case 10: goto tr3; - case 13: goto tr4; - case 34: goto tr0; - case 44: goto tr5; + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + switch( _widec ) { + case 1280: goto tr2; + case 1290: goto tr3; + case 1293: goto tr4; + case 1536: goto tr5; + case 1546: goto tr3; + case 1549: goto tr4; + case 1792: goto tr7; + case 1802: goto tr8; + case 1805: goto tr9; + case 2048: goto tr7; + case 2058: goto tr8; + case 2061: goto tr9; } - goto st1; + if ( _widec > 1407 ) { + if ( 1664 <= _widec && _widec <= 2175 ) + goto tr6; + } else if ( _widec >= 1152 ) + goto st1; + goto tr0; tr2: #line 1 "NONE" {te = p+1;} -#line 46 "ext/fastcsv/fastcsv.rl" +#line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 120 "ext/fastcsv/fastcsv.rl" +#line 137 "ext/fastcsv/fastcsv.rl" { - if (d->start == 0 || p == d->start) { + if (d->start == 0 || p == d->start) { // same as new_row rb_ivar_set(self, s_row, rb_str_new2("")); } else if (p > d->start) { rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); } @@ -451,40 +719,63 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } -#line 150 "ext/fastcsv/fastcsv.rl" +#line 167 "ext/fastcsv/fastcsv.rl" {act = 3;} - goto st5; -st5: + goto st6; +st6: if ( ++p == pe ) - goto _test_eof5; -case 5: -#line 464 "ext/fastcsv/fastcsv.c" - switch( (*p) ) { - case 0: goto tr2; - case 10: goto tr3; - case 13: goto tr4; - case 34: goto tr15; - case 44: goto tr5; + goto _test_eof6; +case 6: +#line 732 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + switch( _widec ) { + case 1280: goto tr2; + case 1290: goto tr3; + case 1293: goto tr4; + case 1536: goto tr5; + case 1546: goto tr3; + case 1549: goto tr4; + case 1792: goto tr7; + case 1802: goto tr8; + case 1805: goto tr9; + case 2048: goto tr7; + case 2058: goto tr8; + case 2061: goto tr9; } - goto st1; + if ( _widec > 1407 ) { + if ( 1664 <= _widec && _widec <= 2175 ) + goto tr6; + } else if ( _widec >= 1152 ) + goto st1; + goto tr42; tr3: -#line 46 "ext/fastcsv/fastcsv.rl" +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 101 "ext/fastcsv/fastcsv.rl" +#line 117 "ext/fastcsv/fastcsv.rl" { + mark_row_sep = p; curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -498,14 +789,37 @@ } rb_yield(row); row = rb_ary_new(); } - goto st6; -tr10: -#line 101 "ext/fastcsv/fastcsv.rl" +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st7; +tr8: +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -519,31 +833,97 @@ } rb_yield(row); row = rb_ary_new(); } - goto st6; -st6: + goto st7; +tr14: +#line 1 "NONE" + {te = p+1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st7; +tr21: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } + goto st7; +tr44: +#line 1 "NONE" + {te = p+1;} +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st7; +st7: if ( ++p == pe ) - goto _test_eof6; -case 6: -#line 530 "ext/fastcsv/fastcsv.c" - goto tr16; + goto _test_eof7; +case 7: +#line 907 "ext/fastcsv/fastcsv.c" + goto tr43; tr4: -#line 46 "ext/fastcsv/fastcsv.rl" +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 101 "ext/fastcsv/fastcsv.rl" +#line 117 "ext/fastcsv/fastcsv.rl" { + mark_row_sep = p; curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -557,14 +937,37 @@ } rb_yield(row); row = rb_ary_new(); } - goto st7; -tr11: -#line 101 "ext/fastcsv/fastcsv.rl" +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st8; +tr9: +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -578,36 +981,93 @@ } rb_yield(row); row = rb_ary_new(); } - goto st7; -st7: + goto st8; +tr15: +#line 1 "NONE" + {te = p+1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st8; +tr22: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } + goto st8; +st8: if ( ++p == pe ) - goto _test_eof7; -case 7: -#line 589 "ext/fastcsv/fastcsv.c" + goto _test_eof8; +case 8: +#line 1049 "ext/fastcsv/fastcsv.c" if ( (*p) == 10 ) - goto st6; - goto tr16; -tr13: + goto tr44; + goto tr43; +tr33: #line 1 "NONE" {te = p+1;} -#line 46 "ext/fastcsv/fastcsv.rl" +#line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 120 "ext/fastcsv/fastcsv.rl" +#line 137 "ext/fastcsv/fastcsv.rl" { - if (d->start == 0 || p == d->start) { + if (d->start == 0 || p == d->start) { // same as new_row rb_ivar_set(self, s_row, rb_str_new2("")); } else if (p > d->start) { rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); } @@ -618,46 +1078,85 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } -#line 150 "ext/fastcsv/fastcsv.rl" +#line 167 "ext/fastcsv/fastcsv.rl" {act = 3;} - goto st8; -st8: + goto st9; +st9: if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 631 "ext/fastcsv/fastcsv.c" - switch( (*p) ) { - case 10: goto tr15; - case 13: goto tr15; - case 34: goto tr15; - case 44: goto tr15; - } - goto st1; -tr14: -#line 38 "ext/fastcsv/fastcsv.rl" + goto _test_eof9; +case 9: +#line 1091 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + if ( _widec < 1291 ) { + if ( 1152 <= _widec && _widec <= 1289 ) + goto st1; + } else if ( _widec > 1292 ) { + if ( 1294 <= _widec && _widec <= 1407 ) + goto st1; + } else + goto st1; + goto tr42; +tr34: +#line 41 "ext/fastcsv/fastcsv.rl" { unclosed_line = curline; } goto st2; +tr45: +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st2; st2: if ( ++p == pe ) goto _test_eof2; case 2: -#line 649 "ext/fastcsv/fastcsv.c" - switch( (*p) ) { - case 0: goto st0; - case 34: goto tr8; +#line 1138 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + switch( _widec ) { + case 522: goto tr12; + case 525: goto tr12; } - goto st2; -st0: -cs = 0; - goto _out; -tr8: -#line 57 "ext/fastcsv/fastcsv.rl" + if ( _widec < 257 ) { + if ( 128 <= _widec && _widec <= 255 ) + goto st2; + } else if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto tr11; + } else + goto st2; + goto tr0; +tr11: +#line 60 "ext/fastcsv/fastcsv.rl" { if (p == ts) { field = rb_enc_str_new("", 0, encoding); ENCODE; } @@ -688,56 +1187,1898 @@ if (copy != NULL) { free(copy); } } } -#line 42 "ext/fastcsv/fastcsv.rl" +#line 45 "ext/fastcsv/fastcsv.rl" { unclosed_line = 0; } goto st3; +tr46: +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 703 "ext/fastcsv/fastcsv.c" - switch( (*p) ) { - case 0: goto tr9; - case 10: goto tr10; - case 13: goto tr11; - case 34: goto st2; - case 44: goto tr12; +#line 1260 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + switch( _widec ) { + case 1280: goto tr13; + case 1290: goto tr14; + case 1293: goto tr15; + case 1536: goto tr16; + case 1546: goto tr17; + case 1549: goto tr18; + case 1792: goto tr20; + case 1802: goto tr21; + case 1805: goto tr22; + case 2048: goto tr24; + case 2058: goto tr25; + case 2061: goto tr26; } - goto st0; + if ( _widec < 1664 ) { + if ( 1408 <= _widec && _widec <= 1663 ) + goto st2; + } else if ( _widec > 1919 ) { + if ( 1920 <= _widec && _widec <= 2175 ) + goto tr23; + } else + goto tr19; + goto tr0; +tr16: +#line 1 "NONE" + {te = p+1;} +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } +#line 167 "ext/fastcsv/fastcsv.rl" + {act = 3;} + goto st10; +tr23: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} + goto st10; +tr24: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } + goto st10; +tr35: +#line 1 "NONE" + {te = p+1;} +#line 41 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = curline; + } +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } +#line 167 "ext/fastcsv/fastcsv.rl" + {act = 3;} + goto st10; +tr38: +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 41 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = curline; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} + goto st10; +tr39: +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 41 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = curline; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } + goto st10; +tr55: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 1493 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + switch( _widec ) { + case 522: goto tr12; + case 525: goto tr12; } - _test_eof4: cs = 4; goto _test_eof; - _test_eof1: cs = 1; goto _test_eof; + if ( _widec < 257 ) { + if ( 128 <= _widec && _widec <= 255 ) + goto st2; + } else if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto tr11; + } else + goto st2; + goto tr0; +tr12: +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } + goto st4; +tr47: +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st4; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: +#line 1615 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + switch( _widec ) { + case 1280: goto tr13; + case 1290: goto tr17; + case 1293: goto tr18; + case 1536: goto tr27; + case 1546: goto tr28; + case 1549: goto tr28; + case 1792: goto tr20; + case 1802: goto tr25; + case 1805: goto tr26; + case 2048: goto tr30; + case 2058: goto tr31; + case 2061: goto tr31; + } + if ( _widec < 1408 ) { + if ( 1152 <= _widec && _widec <= 1407 ) + goto st2; + } else if ( _widec > 1663 ) { + if ( _widec > 1919 ) { + if ( 1920 <= _widec && _widec <= 2175 ) + goto tr29; + } else if ( _widec >= 1664 ) + goto tr23; + } else + goto tr12; + goto tr0; +tr17: +#line 1 "NONE" + {te = p+1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st11; +tr25: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } + goto st11; +tr36: +#line 1 "NONE" + {te = p+1;} +#line 41 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = curline; + } +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st11; +tr40: +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 41 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = curline; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } + goto st11; +tr48: +#line 1 "NONE" + {te = p+1;} +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st11; +tr51: +#line 1 "NONE" + {te = p+1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st11; +tr57: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st11; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: +#line 1912 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + switch( _widec ) { + case 256: goto tr43; + case 522: goto tr47; + case 525: goto tr47; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto tr46; + } else if ( _widec >= 128 ) + goto tr45; + goto tr0; +tr18: +#line 1 "NONE" + {te = p+1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st12; +tr26: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } + goto st12; +tr37: +#line 1 "NONE" + {te = p+1;} +#line 41 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = curline; + } +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st12; +tr41: +#line 1 "NONE" + {te = p+1;} +#line 49 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + // Unquoted empty fields are nil, not "", in Ruby. + field = Qnil; + } + else if (p > ts) { + field = rb_enc_str_new(ts, p - ts, encoding); + ENCODE; + } + } +#line 41 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = curline; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } + goto st12; +tr52: +#line 1 "NONE" + {te = p+1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st12; +tr58: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st12; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: +#line 2168 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + switch( _widec ) { + case 256: goto tr43; + case 266: goto tr48; + case 522: goto tr49; + case 525: goto tr47; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto tr46; + } else if ( _widec >= 128 ) + goto tr45; + goto tr0; +tr28: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st13; +tr31: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } + goto st13; +tr49: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st13; +tr54: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 166 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st13; +tr61: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 117 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + curline++; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st13; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: +#line 2556 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + switch( _widec ) { + case 1280: goto tr50; + case 1290: goto tr51; + case 1293: goto tr52; + case 1536: goto tr53; + case 1546: goto tr54; + case 1549: goto tr54; + case 1792: goto tr56; + case 1802: goto tr57; + case 1805: goto tr58; + case 2048: goto tr60; + case 2058: goto tr61; + case 2061: goto tr61; + } + if ( _widec < 1408 ) { + if ( 1152 <= _widec && _widec <= 1407 ) + goto tr45; + } else if ( _widec > 1663 ) { + if ( _widec > 1919 ) { + if ( 1920 <= _widec && _widec <= 2175 ) + goto tr59; + } else if ( _widec >= 1664 ) + goto tr55; + } else + goto tr47; + goto tr0; +tr27: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } +#line 167 "ext/fastcsv/fastcsv.rl" + {act = 3;} + goto st14; +tr29: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} + goto st14; +tr30: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } + goto st14; +tr53: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } +#line 167 "ext/fastcsv/fastcsv.rl" + {act = 3;} + goto st14; +tr59: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } + goto st14; +tr60: +#line 1 "NONE" + {te = p+1;} +#line 60 "ext/fastcsv/fastcsv.rl" + { + if (p == ts) { + field = rb_enc_str_new("", 0, encoding); + ENCODE; + } + // @note If we add an action on '""', we can skip some steps if no '""' is found. + else if (p > ts) { + // Operating on ts in-place produces odd behavior, FYI. + char *copy = ALLOC_N(char, p - ts); + memcpy(copy, ts, p - ts); + + char *reader = ts, *writer = copy; + int escaped = 0; + + while (p > reader) { + if (*reader == quote_char && !escaped) { + // Skip the escaping character. + escaped = 1; + } + else { + escaped = 0; + *writer++ = *reader; + } + reader++; + } + + field = rb_enc_str_new(copy, writer - copy, encoding); + ENCODE; + + if (copy != NULL) { + free(copy); + } + } + } +#line 45 "ext/fastcsv/fastcsv.rl" + { + unclosed_line = 0; + } +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 165 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + } +#line 137 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } + goto st14; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: +#line 3004 "ext/fastcsv/fastcsv.c" + _widec = (*p); + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 155 "ext/fastcsv/fastcsv.rl" + (*p) == quote_char ) _widec += 256; + if ( +#line 156 "ext/fastcsv/fastcsv.rl" + (*p) == col_sep ) _widec += 512; + switch( _widec ) { + case 1280: goto tr13; + case 1290: goto tr17; + case 1293: goto tr18; + case 1536: goto tr27; + case 1546: goto tr28; + case 1549: goto tr28; + case 1792: goto tr20; + case 1802: goto tr25; + case 1805: goto tr26; + case 2048: goto tr30; + case 2058: goto tr31; + case 2061: goto tr31; + } + if ( _widec < 1408 ) { + if ( 1152 <= _widec && _widec <= 1407 ) + goto st2; + } else if ( _widec > 1663 ) { + if ( _widec > 1919 ) { + if ( 1920 <= _widec && _widec <= 2175 ) + goto tr29; + } else if ( _widec >= 1664 ) + goto tr23; + } else + goto tr12; + goto tr0; + } _test_eof5: cs = 5; goto _test_eof; + _test_eof1: cs = 1; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; _test_eof: {} if ( p == eof ) { switch ( cs ) { case 1: goto tr0; - case 5: goto tr15; - case 6: goto tr16; - case 7: goto tr16; - case 8: goto tr15; + case 6: goto tr42; + case 7: goto tr43; + case 8: goto tr43; + case 9: goto tr42; + case 2: goto tr0; + case 3: goto tr0; + case 10: goto tr0; + case 4: goto tr0; + case 11: goto tr43; + case 12: goto tr43; + case 13: goto tr43; + case 14: goto tr0; } } _out: {} } -#line 409 "ext/fastcsv/fastcsv.rl" +#line 444 "ext/fastcsv/fastcsv.rl" if (done && cs < raw_parse_first_final) { - if (d->start == 0 || p == d->start) { + if (d->start == 0 || p == d->start) { // same as new_row rb_ivar_set(self, s_row, rb_str_new2("")); } else if (p > d->start) { rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); }