ext/fastcsv/fastcsv.c in fastcsv-0.0.6 vs ext/fastcsv/fastcsv.c in fastcsv-0.0.7

- old
+ new

@@ -41,15 +41,15 @@ #line 172 "ext/fastcsv/fastcsv.rl" #line 46 "ext/fastcsv/fastcsv.c" -static const int raw_parse_start = 5; -static const int raw_parse_first_final = 5; +static const int raw_parse_start = 4; +static const int raw_parse_first_final = 4; static const int raw_parse_error = 0; -static const int raw_parse_en_main = 5; +static const int raw_parse_en_main = 4; #line 175 "ext/fastcsv/fastcsv.rl" // 16 kB @@ -130,11 +130,11 @@ } else if (!NIL_P(option)) { rb_raise(rb_eArgError, ":col_sep has to be a single character String"); } - // @see rb_io_extract_modeenc + // @see rb_io_extract_modeenc parse_mode_enc /* Set to defaults */ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2, 0); // "enc" (internal) or "enc2:enc" (external:internal) or "enc:-" (external). // We don't support binmode, which would force "ASCII-8BIT", or "BOM|UTF-*". @@ -348,11 +348,11 @@ default: {{p = ((te))-1;}} break; } } - goto st5; + goto st4; tr5: #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. @@ -380,11 +380,11 @@ rb_yield(row); } } #line 170 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st5; + goto st4; tr6: #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. @@ -400,11 +400,11 @@ rb_ary_push(row, field); field = Qnil; } #line 168 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st5; + goto st4; tr7: #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. @@ -437,12 +437,12 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } - goto st5; -tr13: + goto st4; +tr12: #line 138 "ext/fastcsv/fastcsv.rl" { if (d->start == 0 || p == d->start) { // same as new_row rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -458,21 +458,21 @@ rb_yield(row); } } #line 170 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st5; -tr19: + goto st4; +tr18: #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); field = Qnil; } #line 168 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st5; -tr20: + goto st4; +tr19: #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); field = Qnil; } @@ -493,16 +493,16 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } - goto st5; -tr42: + goto st4; +tr36: #line 170 "ext/fastcsv/fastcsv.rl" {te = p;p--;} - goto st5; -tr43: + goto st4; +tr37: #line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; if (len_row_sep) { @@ -528,12 +528,12 @@ default: {{p = ((te))-1;}} break; } } - goto st5; -tr50: + goto st4; +tr43: #line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; if (len_row_sep) { @@ -549,10 +549,37 @@ memcpy(row_sep, mark_row_sep, len_row_sep); } curline++; } +#line 169 "ext/fastcsv/fastcsv.rl" + {te = p;p--;} + goto st4; +tr44: +#line 169 "ext/fastcsv/fastcsv.rl" + {te = p;p--;} + goto st4; +tr45: +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + + curline++; + } #line 138 "ext/fastcsv/fastcsv.rl" { if (d->start == 0 || p == d->start) { // same as new_row rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -568,12 +595,12 @@ rb_yield(row); } } #line 170 "ext/fastcsv/fastcsv.rl" {te = p+1;} - goto st5; -tr56: + goto st4; +tr51: #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); field = Qnil; } @@ -596,10 +623,38 @@ memcpy(row_sep, mark_row_sep, len_row_sep); } curline++; } + goto st4; +tr52: +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 168 "ext/fastcsv/fastcsv.rl" + {te = p+1;} +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + + curline++; + } #line 138 "ext/fastcsv/fastcsv.rl" { if (d->start == 0 || p == d->start) { // same as new_row rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -613,55 +668,55 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } - goto st5; -st5: + goto st4; +st4: #line 1 "NONE" {ts = 0;} #line 1 "NONE" {act = 0;} if ( ++p == pe ) - goto _test_eof5; -case 5: + goto _test_eof4; +case 4: #line 1 "NONE" {ts = p;} -#line 630 "ext/fastcsv/fastcsv.c" +#line 685 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(1152 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; if ( #line 157 "ext/fastcsv/fastcsv.rl" (*p) == col_sep ) _widec += 512; switch( _widec ) { - case 1280: goto tr33; + case 1280: goto tr27; case 1290: goto tr3; case 1293: goto tr4; - case 1536: goto tr35; - case 1546: goto tr36; - case 1549: goto tr37; + case 1536: goto tr29; + case 1546: goto tr30; + case 1549: goto tr31; case 1792: goto tr7; case 1802: goto tr8; case 1805: goto tr9; - case 2048: goto tr39; - case 2058: goto tr40; - case 2061: goto tr41; + case 2048: goto tr33; + case 2058: goto tr34; + case 2061: goto tr35; } if ( _widec < 1408 ) { if ( 1152 <= _widec && _widec <= 1407 ) goto st1; } else if ( _widec > 1663 ) { if ( _widec > 1919 ) { if ( 1920 <= _widec && _widec <= 2175 ) - goto tr38; + goto tr32; } else if ( _widec >= 1664 ) goto tr6; } else - goto tr34; + goto tr28; goto st0; st0: cs = 0; goto _out; st1: @@ -727,16 +782,16 @@ rb_yield(row); } } #line 170 "ext/fastcsv/fastcsv.rl" {act = 3;} - goto st6; -st6: + goto st5; +st5: if ( ++p == pe ) - goto _test_eof6; -case 6: -#line 738 "ext/fastcsv/fastcsv.c" + goto _test_eof5; +case 5: +#line 793 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(1152 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; @@ -760,11 +815,11 @@ if ( _widec > 1407 ) { if ( 1664 <= _widec && _widec <= 2175 ) goto tr6; } else if ( _widec >= 1152 ) goto st1; - goto tr42; + goto tr36; tr3: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { @@ -796,11 +851,11 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st7; + goto st6; tr8: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { @@ -837,12 +892,12 @@ } rb_yield(row); row = rb_ary_new(); } - goto st7; -tr14: + goto st6; +tr13: #line 1 "NONE" {te = p+1;} #line 119 "ext/fastcsv/fastcsv.rl" { mark_row_sep = p; @@ -862,12 +917,12 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st7; -tr21: + goto st6; +tr20: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); @@ -892,23 +947,116 @@ } rb_yield(row); row = rb_ary_new(); } - goto st7; -tr44: + goto st6; +tr38: #line 1 "NONE" {te = p+1;} #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st7; -st7: + goto st6; +tr46: +#line 1 "NONE" + {te = p+1;} +#line 119 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + + curline++; + } +#line 169 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st6; +tr53: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 168 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 119 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + + curline++; + } + goto st6; +st6: if ( ++p == pe ) - goto _test_eof7; -case 7: -#line 909 "ext/fastcsv/fastcsv.c" - goto tr43; + goto _test_eof6; +case 6: +#line 1057 "ext/fastcsv/fastcsv.c" + goto tr37; tr4: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { @@ -940,11 +1088,11 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st8; + goto st7; tr9: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { @@ -981,12 +1129,12 @@ } rb_yield(row); row = rb_ary_new(); } - goto st8; -tr15: + goto st7; +tr14: #line 1 "NONE" {te = p+1;} #line 119 "ext/fastcsv/fastcsv.rl" { mark_row_sep = p; @@ -1006,12 +1154,12 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st8; -tr22: + goto st7; +tr21: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); @@ -1036,20 +1184,113 @@ } rb_yield(row); row = rb_ary_new(); } - goto st8; -st8: + goto st7; +tr47: +#line 1 "NONE" + {te = p+1;} +#line 119 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + + curline++; + } +#line 169 "ext/fastcsv/fastcsv.rl" + {act = 2;} + goto st7; +tr54: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" + { + rb_ary_push(row, field); + field = Qnil; + } +#line 168 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 119 "ext/fastcsv/fastcsv.rl" + { + mark_row_sep = p; + + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field + rb_ary_push(row, field); + field = Qnil; + } + + rb_yield(row); + row = rb_ary_new(); + } +#line 100 "ext/fastcsv/fastcsv.rl" + { + d->start = p; + + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; + + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); + } + } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + + curline++; + } + goto st7; +st7: if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 1047 "ext/fastcsv/fastcsv.c" + goto _test_eof7; +case 7: +#line 1288 "ext/fastcsv/fastcsv.c" if ( (*p) == 10 ) - goto tr44; - goto tr43; -tr33: + goto tr38; + goto tr37; +tr27: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { @@ -1078,16 +1319,16 @@ rb_yield(row); } } #line 170 "ext/fastcsv/fastcsv.rl" {act = 3;} - goto st9; -st9: + goto st8; +st8: if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 1089 "ext/fastcsv/fastcsv.c" + goto _test_eof8; +case 8: +#line 1330 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(1152 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; @@ -1100,18 +1341,18 @@ } else if ( _widec > 1292 ) { if ( 1294 <= _widec && _widec <= 1407 ) goto st1; } else goto st1; - goto tr42; -tr34: + goto tr36; +tr28: #line 41 "ext/fastcsv/fastcsv.rl" { unclosed_line = curline; } goto st2; -tr45: +tr39: #line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; if (len_row_sep) { @@ -1132,20 +1373,16 @@ goto st2; st2: if ( ++p == pe ) goto _test_eof2; case 2: -#line 1138 "ext/fastcsv/fastcsv.c" +#line 1379 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(128 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; - switch( _widec ) { - case 522: goto tr12; - case 525: goto tr12; - } if ( _widec < 257 ) { if ( 128 <= _widec && _widec <= 255 ) goto st2; } else if ( _widec > 383 ) { if ( 384 <= _widec && _widec <= 639 ) @@ -1192,11 +1429,11 @@ #line 45 "ext/fastcsv/fastcsv.rl" { unclosed_line = 0; } goto st3; -tr46: +tr40: #line 60 "ext/fastcsv/fastcsv.rl" { if (p == ts) { field = rb_enc_str_new("", 0, encoding); ENCODE; @@ -1256,43 +1493,43 @@ goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1262 "ext/fastcsv/fastcsv.c" +#line 1499 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(1152 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; if ( #line 157 "ext/fastcsv/fastcsv.rl" (*p) == col_sep ) _widec += 512; switch( _widec ) { - case 1280: goto tr13; - case 1290: goto tr14; - case 1293: goto tr15; - case 1536: goto tr16; - case 1546: goto tr17; - case 1549: goto tr18; - case 1792: goto tr20; - case 1802: goto tr21; - case 1805: goto tr22; - case 2048: goto tr24; - case 2058: goto tr25; - case 2061: goto tr26; + case 1280: goto tr12; + case 1290: goto tr13; + case 1293: goto tr14; + case 1536: goto tr15; + case 1546: goto tr16; + case 1549: goto tr17; + case 1792: goto tr19; + case 1802: goto tr20; + case 1805: goto tr21; + case 2048: goto tr23; + case 2058: goto tr24; + case 2061: goto tr25; } if ( _widec < 1664 ) { if ( 1408 <= _widec && _widec <= 1663 ) goto st2; } else if ( _widec > 1919 ) { if ( 1920 <= _widec && _widec <= 2175 ) - goto tr23; + goto tr22; } else - goto tr19; + goto tr18; goto tr0; -tr16: +tr15: #line 1 "NONE" {te = p+1;} #line 138 "ext/fastcsv/fastcsv.rl" { if (d->start == 0 || p == d->start) { // same as new_row @@ -1310,23 +1547,23 @@ rb_yield(row); } } #line 170 "ext/fastcsv/fastcsv.rl" {act = 3;} - goto st10; -tr23: + goto st9; +tr22: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); field = Qnil; } #line 168 "ext/fastcsv/fastcsv.rl" {act = 1;} - goto st10; -tr24: + goto st9; +tr23: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); @@ -1349,12 +1586,12 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } - goto st10; -tr35: + goto st9; +tr29: #line 1 "NONE" {te = p+1;} #line 41 "ext/fastcsv/fastcsv.rl" { unclosed_line = curline; @@ -1387,12 +1624,12 @@ rb_yield(row); } } #line 170 "ext/fastcsv/fastcsv.rl" {act = 3;} - goto st10; -tr38: + goto st9; +tr32: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { @@ -1413,12 +1650,12 @@ rb_ary_push(row, field); field = Qnil; } #line 168 "ext/fastcsv/fastcsv.rl" {act = 1;} - goto st10; -tr39: + goto st9; +tr33: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { @@ -1456,21 +1693,14 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } - goto st10; -tr55: + goto st9; +tr48: #line 1 "NONE" {te = p+1;} -#line 95 "ext/fastcsv/fastcsv.rl" - { - rb_ary_push(row, field); - field = Qnil; - } -#line 168 "ext/fastcsv/fastcsv.rl" - {act = 1;} #line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; if (len_row_sep) { @@ -1486,115 +1716,70 @@ memcpy(row_sep, mark_row_sep, len_row_sep); } curline++; } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 1497 "ext/fastcsv/fastcsv.c" - _widec = (*p); - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 156 "ext/fastcsv/fastcsv.rl" - (*p) == quote_char ) _widec += 256; - switch( _widec ) { - case 522: goto tr12; - case 525: goto tr12; - } - if ( _widec < 257 ) { - if ( 128 <= _widec && _widec <= 255 ) - goto st2; - } else if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto tr11; - } else - goto st2; - goto tr0; -tr12: -#line 60 "ext/fastcsv/fastcsv.rl" +#line 138 "ext/fastcsv/fastcsv.rl" { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } - char *reader = ts, *writer = copy; - int escaped = 0; + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } + if (RARRAY_LEN(row)) { + rb_yield(row); } } -#line 45 "ext/fastcsv/fastcsv.rl" +#line 170 "ext/fastcsv/fastcsv.rl" + {act = 3;} + goto st9; +tr55: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" { - unclosed_line = 0; + rb_ary_push(row, field); + field = Qnil; } - goto st4; -tr47: -#line 60 "ext/fastcsv/fastcsv.rl" +#line 168 "ext/fastcsv/fastcsv.rl" + {act = 1;} +#line 100 "ext/fastcsv/fastcsv.rl" { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); + d->start = p; - char *reader = ts, *writer = copy; - int escaped = 0; + if (len_row_sep) { + if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { + FREE; - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; + rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } } + else { + len_row_sep = p - mark_row_sep; + row_sep = ALLOC_N(char, len_row_sep); + memcpy(row_sep, mark_row_sep, len_row_sep); + } + + curline++; } -#line 45 "ext/fastcsv/fastcsv.rl" + goto st9; +tr56: +#line 1 "NONE" + {te = p+1;} +#line 95 "ext/fastcsv/fastcsv.rl" { - unclosed_line = 0; + rb_ary_push(row, field); + field = Qnil; } +#line 168 "ext/fastcsv/fastcsv.rl" + {act = 1;} #line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; if (len_row_sep) { @@ -1610,51 +1795,48 @@ memcpy(row_sep, mark_row_sep, len_row_sep); } curline++; } - goto st4; -st4: +#line 138 "ext/fastcsv/fastcsv.rl" + { + if (d->start == 0 || p == d->start) { // same as new_row + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + + if (!NIL_P(field) || RARRAY_LEN(row)) { + rb_ary_push(row, field); + } + + if (RARRAY_LEN(row)) { + rb_yield(row); + } + } + goto st9; +st9: if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1621 "ext/fastcsv/fastcsv.c" + goto _test_eof9; +case 9: +#line 1823 "ext/fastcsv/fastcsv.c" _widec = (*p); - _widec = (short)(1152 + ((*p) - -128)); + _widec = (short)(128 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; - if ( -#line 157 "ext/fastcsv/fastcsv.rl" - (*p) == col_sep ) _widec += 512; - switch( _widec ) { - case 1280: goto tr13; - case 1290: goto tr17; - case 1293: goto tr18; - case 1536: goto tr27; - case 1546: goto tr28; - case 1549: goto tr28; - case 1792: goto tr20; - case 1802: goto tr25; - case 1805: goto tr26; - case 2048: goto tr30; - case 2058: goto tr31; - case 2061: goto tr31; - } - if ( _widec < 1408 ) { - if ( 1152 <= _widec && _widec <= 1407 ) + if ( _widec < 257 ) { + if ( 128 <= _widec && _widec <= 255 ) goto st2; - } else if ( _widec > 1663 ) { - if ( _widec > 1919 ) { - if ( 1920 <= _widec && _widec <= 2175 ) - goto tr29; - } else if ( _widec >= 1664 ) - goto tr23; + } else if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto tr11; } else - goto tr12; + goto st2; goto tr0; -tr17: +tr16: #line 1 "NONE" {te = p+1;} #line 119 "ext/fastcsv/fastcsv.rl" { mark_row_sep = p; @@ -1674,12 +1856,12 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st11; -tr25: + goto st10; +tr24: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); @@ -1704,12 +1886,12 @@ } rb_yield(row); row = rb_ary_new(); } - goto st11; -tr36: + goto st10; +tr30: #line 1 "NONE" {te = p+1;} #line 41 "ext/fastcsv/fastcsv.rl" { unclosed_line = curline; @@ -1744,12 +1926,12 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st11; -tr40: + goto st10; +tr34: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { @@ -1789,12 +1971,12 @@ } rb_yield(row); row = rb_ary_new(); } - goto st11; -tr48: + goto st10; +tr41: #line 1 "NONE" {te = p+1;} #line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; @@ -1814,12 +1996,12 @@ curline++; } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st11; -tr51: + goto st10; +tr49: #line 1 "NONE" {te = p+1;} #line 119 "ext/fastcsv/fastcsv.rl" { mark_row_sep = p; @@ -1858,11 +2040,11 @@ curline++; } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st11; + goto st10; tr57: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { @@ -1907,33 +2089,30 @@ memcpy(row_sep, mark_row_sep, len_row_sep); } curline++; } - goto st11; -st11: + goto st10; +st10: if ( ++p == pe ) - goto _test_eof11; -case 11: -#line 1918 "ext/fastcsv/fastcsv.c" + goto _test_eof10; +case 10: +#line 2100 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(128 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; - switch( _widec ) { - case 256: goto tr43; - case 522: goto tr47; - case 525: goto tr47; - } + if ( _widec == 256 ) + goto tr37; if ( _widec > 383 ) { if ( 384 <= _widec && _widec <= 639 ) - goto tr46; + goto tr40; } else if ( _widec >= 128 ) - goto tr45; + goto tr39; goto tr0; -tr18: +tr17: #line 1 "NONE" {te = p+1;} #line 119 "ext/fastcsv/fastcsv.rl" { mark_row_sep = p; @@ -1953,12 +2132,12 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st12; -tr26: + goto st11; +tr25: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); @@ -1983,12 +2162,12 @@ } rb_yield(row); row = rb_ary_new(); } - goto st12; -tr37: + goto st11; +tr31: #line 1 "NONE" {te = p+1;} #line 41 "ext/fastcsv/fastcsv.rl" { unclosed_line = curline; @@ -2023,12 +2202,12 @@ rb_yield(row); row = rb_ary_new(); } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st12; -tr41: + goto st11; +tr35: #line 1 "NONE" {te = p+1;} #line 49 "ext/fastcsv/fastcsv.rl" { if (p == ts) { @@ -2068,12 +2247,12 @@ } rb_yield(row); row = rb_ary_new(); } - goto st12; -tr52: + goto st11; +tr50: #line 1 "NONE" {te = p+1;} #line 119 "ext/fastcsv/fastcsv.rl" { mark_row_sep = p; @@ -2112,11 +2291,11 @@ curline++; } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st12; + goto st11; tr58: #line 1 "NONE" {te = p+1;} #line 95 "ext/fastcsv/fastcsv.rl" { @@ -2161,34 +2340,33 @@ memcpy(row_sep, mark_row_sep, len_row_sep); } curline++; } - goto st12; -st12: + goto st11; +st11: if ( ++p == pe ) - goto _test_eof12; -case 12: -#line 2172 "ext/fastcsv/fastcsv.c" + goto _test_eof11; +case 11: +#line 2351 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(128 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; switch( _widec ) { - case 256: goto tr43; - case 266: goto tr48; - case 522: goto tr49; - case 525: goto tr47; + case 256: goto tr37; + case 266: goto tr41; + case 522: goto tr42; } if ( _widec > 383 ) { if ( 384 <= _widec && _widec <= 639 ) - goto tr46; + goto tr40; } else if ( _widec >= 128 ) - goto tr45; + goto tr39; goto tr0; -tr28: +tr42: #line 1 "NONE" {te = p+1;} #line 60 "ext/fastcsv/fastcsv.rl" { if (p == ts) { @@ -2226,143 +2404,10 @@ } #line 45 "ext/fastcsv/fastcsv.rl" { unclosed_line = 0; } -#line 119 "ext/fastcsv/fastcsv.rl" - { - mark_row_sep = p; - - if (d->start == 0 || p == d->start) { - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field - rb_ary_push(row, field); - field = Qnil; - } - - rb_yield(row); - row = rb_ary_new(); - } -#line 169 "ext/fastcsv/fastcsv.rl" - {act = 2;} - goto st13; -tr31: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 95 "ext/fastcsv/fastcsv.rl" - { - rb_ary_push(row, field); - field = Qnil; - } -#line 168 "ext/fastcsv/fastcsv.rl" - {act = 1;} -#line 119 "ext/fastcsv/fastcsv.rl" - { - mark_row_sep = p; - - if (d->start == 0 || p == d->start) { - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field - rb_ary_push(row, field); - field = Qnil; - } - - rb_yield(row); - row = rb_ary_new(); - } - goto st13; -tr49: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } #line 100 "ext/fastcsv/fastcsv.rl" { d->start = p; if (len_row_sep) { @@ -2380,708 +2425,79 @@ curline++; } #line 169 "ext/fastcsv/fastcsv.rl" {act = 2;} - goto st13; -tr54: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 119 "ext/fastcsv/fastcsv.rl" - { - mark_row_sep = p; - - if (d->start == 0 || p == d->start) { - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field - rb_ary_push(row, field); - field = Qnil; - } - - rb_yield(row); - row = rb_ary_new(); - } -#line 100 "ext/fastcsv/fastcsv.rl" - { - d->start = p; - - if (len_row_sep) { - if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { - FREE; - - rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); - } - } - else { - len_row_sep = p - mark_row_sep; - row_sep = ALLOC_N(char, len_row_sep); - memcpy(row_sep, mark_row_sep, len_row_sep); - } - - curline++; - } -#line 169 "ext/fastcsv/fastcsv.rl" - {act = 2;} - goto st13; -tr61: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 95 "ext/fastcsv/fastcsv.rl" - { - rb_ary_push(row, field); - field = Qnil; - } -#line 168 "ext/fastcsv/fastcsv.rl" - {act = 1;} -#line 119 "ext/fastcsv/fastcsv.rl" - { - mark_row_sep = p; - - if (d->start == 0 || p == d->start) { - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field - rb_ary_push(row, field); - field = Qnil; - } - - rb_yield(row); - row = rb_ary_new(); - } -#line 100 "ext/fastcsv/fastcsv.rl" - { - d->start = p; - - if (len_row_sep) { - if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { - FREE; - - rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); - } - } - else { - len_row_sep = p - mark_row_sep; - row_sep = ALLOC_N(char, len_row_sep); - memcpy(row_sep, mark_row_sep, len_row_sep); - } - - curline++; - } - goto st13; -st13: + goto st12; +st12: if ( ++p == pe ) - goto _test_eof13; -case 13: -#line 2562 "ext/fastcsv/fastcsv.c" + goto _test_eof12; +case 12: +#line 2436 "ext/fastcsv/fastcsv.c" _widec = (*p); _widec = (short)(1152 + ((*p) - -128)); if ( #line 156 "ext/fastcsv/fastcsv.rl" (*p) == quote_char ) _widec += 256; if ( #line 157 "ext/fastcsv/fastcsv.rl" (*p) == col_sep ) _widec += 512; switch( _widec ) { - case 1280: goto tr50; - case 1290: goto tr51; - case 1293: goto tr52; - case 1536: goto tr53; - case 1546: goto tr54; - case 1549: goto tr54; - case 1792: goto tr56; - case 1802: goto tr57; - case 1805: goto tr58; - case 2048: goto tr60; - case 2058: goto tr61; - case 2061: goto tr61; + case 1280: goto tr45; + case 1290: goto tr46; + case 1293: goto tr47; + case 1536: goto tr48; + case 1546: goto tr49; + case 1549: goto tr50; + case 1792: goto tr52; + case 1802: goto tr53; + case 1805: goto tr54; + case 2048: goto tr56; + case 2058: goto tr57; + case 2061: goto tr58; } if ( _widec < 1408 ) { if ( 1152 <= _widec && _widec <= 1407 ) - goto tr45; + goto tr43; } else if ( _widec > 1663 ) { if ( _widec > 1919 ) { if ( 1920 <= _widec && _widec <= 2175 ) - goto tr59; + goto tr55; } else if ( _widec >= 1664 ) - goto tr55; + goto tr51; } else - goto tr47; - goto tr0; -tr27: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 138 "ext/fastcsv/fastcsv.rl" - { - if (d->start == 0 || p == d->start) { // same as new_row - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { - rb_ary_push(row, field); - } - - if (RARRAY_LEN(row)) { - rb_yield(row); - } - } -#line 170 "ext/fastcsv/fastcsv.rl" - {act = 3;} - goto st14; -tr29: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 95 "ext/fastcsv/fastcsv.rl" - { - rb_ary_push(row, field); - field = Qnil; - } -#line 168 "ext/fastcsv/fastcsv.rl" - {act = 1;} - goto st14; -tr30: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 95 "ext/fastcsv/fastcsv.rl" - { - rb_ary_push(row, field); - field = Qnil; - } -#line 168 "ext/fastcsv/fastcsv.rl" - {act = 1;} -#line 138 "ext/fastcsv/fastcsv.rl" - { - if (d->start == 0 || p == d->start) { // same as new_row - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { - rb_ary_push(row, field); - } - - if (RARRAY_LEN(row)) { - rb_yield(row); - } - } - goto st14; -tr53: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 100 "ext/fastcsv/fastcsv.rl" - { - d->start = p; - - if (len_row_sep) { - if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { - FREE; - - rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); - } - } - else { - len_row_sep = p - mark_row_sep; - row_sep = ALLOC_N(char, len_row_sep); - memcpy(row_sep, mark_row_sep, len_row_sep); - } - - curline++; - } -#line 138 "ext/fastcsv/fastcsv.rl" - { - if (d->start == 0 || p == d->start) { // same as new_row - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { - rb_ary_push(row, field); - } - - if (RARRAY_LEN(row)) { - rb_yield(row); - } - } -#line 170 "ext/fastcsv/fastcsv.rl" - {act = 3;} - goto st14; -tr59: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 95 "ext/fastcsv/fastcsv.rl" - { - rb_ary_push(row, field); - field = Qnil; - } -#line 168 "ext/fastcsv/fastcsv.rl" - {act = 1;} -#line 100 "ext/fastcsv/fastcsv.rl" - { - d->start = p; - - if (len_row_sep) { - if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { - FREE; - - rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); - } - } - else { - len_row_sep = p - mark_row_sep; - row_sep = ALLOC_N(char, len_row_sep); - memcpy(row_sep, mark_row_sep, len_row_sep); - } - - curline++; - } - goto st14; -tr60: -#line 1 "NONE" - {te = p+1;} -#line 60 "ext/fastcsv/fastcsv.rl" - { - if (p == ts) { - field = rb_enc_str_new("", 0, encoding); - ENCODE; - } - // @note If we add an action on '""', we can skip some steps if no '""' is found. - else if (p > ts) { - // Operating on ts in-place produces odd behavior, FYI. - char *copy = ALLOC_N(char, p - ts); - memcpy(copy, ts, p - ts); - - char *reader = ts, *writer = copy; - int escaped = 0; - - while (p > reader) { - if (*reader == quote_char && !escaped) { - // Skip the escaping character. - escaped = 1; - } - else { - escaped = 0; - *writer++ = *reader; - } - reader++; - } - - field = rb_enc_str_new(copy, writer - copy, encoding); - ENCODE; - - if (copy != NULL) { - free(copy); - } - } - } -#line 45 "ext/fastcsv/fastcsv.rl" - { - unclosed_line = 0; - } -#line 95 "ext/fastcsv/fastcsv.rl" - { - rb_ary_push(row, field); - field = Qnil; - } -#line 168 "ext/fastcsv/fastcsv.rl" - {act = 1;} -#line 100 "ext/fastcsv/fastcsv.rl" - { - d->start = p; - - if (len_row_sep) { - if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || (len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1))) { - FREE; - - rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline); - } - } - else { - len_row_sep = p - mark_row_sep; - row_sep = ALLOC_N(char, len_row_sep); - memcpy(row_sep, mark_row_sep, len_row_sep); - } - - curline++; - } -#line 138 "ext/fastcsv/fastcsv.rl" - { - if (d->start == 0 || p == d->start) { // same as new_row - rb_ivar_set(self, s_row, rb_str_new2("")); - } - else if (p > d->start) { - rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); - } - - if (!NIL_P(field) || RARRAY_LEN(row)) { - rb_ary_push(row, field); - } - - if (RARRAY_LEN(row)) { - rb_yield(row); - } - } - goto st14; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: -#line 3016 "ext/fastcsv/fastcsv.c" - _widec = (*p); - _widec = (short)(1152 + ((*p) - -128)); - if ( -#line 156 "ext/fastcsv/fastcsv.rl" - (*p) == quote_char ) _widec += 256; - if ( -#line 157 "ext/fastcsv/fastcsv.rl" - (*p) == col_sep ) _widec += 512; - switch( _widec ) { - case 1280: goto tr13; - case 1290: goto tr17; - case 1293: goto tr18; - case 1536: goto tr27; - case 1546: goto tr28; - case 1549: goto tr28; - case 1792: goto tr20; - case 1802: goto tr25; - case 1805: goto tr26; - case 2048: goto tr30; - case 2058: goto tr31; - case 2061: goto tr31; + goto tr39; + goto tr44; } - if ( _widec < 1408 ) { - if ( 1152 <= _widec && _widec <= 1407 ) - goto st2; - } else if ( _widec > 1663 ) { - if ( _widec > 1919 ) { - if ( 1920 <= _widec && _widec <= 2175 ) - goto tr29; - } else if ( _widec >= 1664 ) - goto tr23; - } else - goto tr12; - goto tr0; - } - _test_eof5: cs = 5; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; _test_eof1: cs = 1; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; _test_eof: {} if ( p == eof ) { switch ( cs ) { case 1: goto tr0; - case 6: goto tr42; - case 7: goto tr43; - case 8: goto tr43; - case 9: goto tr42; + case 5: goto tr36; + case 6: goto tr37; + case 7: goto tr37; + case 8: goto tr36; case 2: goto tr0; case 3: goto tr0; - case 10: goto tr0; - case 4: goto tr0; - case 11: goto tr43; + case 9: goto tr0; + case 10: goto tr37; + case 11: goto tr37; case 12: goto tr43; - case 13: goto tr43; - case 14: goto tr0; } } _out: {} } @@ -3114,10 +2530,10 @@ memmove(buf, ts, have); // @see https://github.com/hpricot/hpricot/blob/master/ext/hpricot_scan/hpricot_scan.rl#L92 if (d->start > ts) { d->start = buf + (d->start - ts); } - if (mark_row_sep > ts) { + if (mark_row_sep >= ts) { mark_row_sep = buf + (mark_row_sep - ts); } te = buf + (te - ts); ts = buf; }