ext/fastcsv/fastcsv.c in fastcsv-0.0.3 vs ext/fastcsv/fastcsv.c in fastcsv-0.0.4

- old
+ new

@@ -22,13 +22,10 @@ } #define FREE \ if (buf != NULL) { \ free(buf); \ -} \ -if (row_sep != NULL) { \ - free(row_sep); \ } static VALUE cClass, cParser, eError; static ID s_read, s_row; @@ -36,23 +33,23 @@ typedef struct { char *start; } Data; -#line 170 "ext/fastcsv/fastcsv.rl" +#line 152 "ext/fastcsv/fastcsv.rl" -#line 46 "ext/fastcsv/fastcsv.c" +#line 43 "ext/fastcsv/fastcsv.c" static const int raw_parse_start = 4; static const int raw_parse_first_final = 4; static const int raw_parse_error = 0; static const int raw_parse_en_main = 4; -#line 173 "ext/fastcsv/fastcsv.rl" +#line 155 "ext/fastcsv/fastcsv.rl" // 16 kB #define BUFSIZE 16384 // @see http://rxr.whitequark.org/mri/source/io.c#4845 @@ -81,15 +78,15 @@ } } static VALUE raw_parse(int argc, VALUE *argv, VALUE self) { int cs, act, have = 0, curline = 1, io = 0; - char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = NULL; + char *ts = 0, *te = 0, *buf = 0, *eof = 0; VALUE port, opts, r_encoding; VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil; - int done = 0, unclosed_line = 0, len_row_sep = 0, buffer_size = 0, taint = 0; + int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0; rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL; Data *d; Data_Get_Struct(self, Data, d); @@ -223,10 +220,12 @@ } else if (!NIL_P(r_encoding)) { encoding = rb_enc_get(r_encoding); } + // In case #raw_parse is called multiple times on the same parser. Note that + // using IO methods on a re-used parser can cause segmentation faults. rb_ivar_set(self, s_row, Qnil); buffer_size = BUFSIZE; if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) { bufsize = rb_ivar_get(self, rb_intern("@buffer_size")); @@ -242,42 +241,40 @@ if (io) { buf = ALLOC_N(char, buffer_size); } -#line 248 "ext/fastcsv/fastcsv.c" +#line 247 "ext/fastcsv/fastcsv.c" { cs = raw_parse_start; ts = 0; te = 0; act = 0; } -#line 366 "ext/fastcsv/fastcsv.rl" +#line 350 "ext/fastcsv/fastcsv.rl" while (!done) { VALUE str; char *p, *pe; - int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff, mark_row_sep_diff; + int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff; if (io) { if (space == 0) { // Not moving d->start will cause intermittent segmentation faults. tokstart_diff = ts - buf; tokend_diff = te - buf; start_diff = d->start - buf; - mark_row_sep_diff = mark_row_sep - buf; buffer_size += BUFSIZE; REALLOC_N(buf, char, buffer_size); space = buffer_size - have; ts = buf + tokstart_diff; te = buf + tokend_diff; d->start = buf + start_diff; - mark_row_sep = buf + mark_row_sep_diff; } p = buf + have; // Reads "`length` bytes without any conversion (binary mode)." // "The resulted string is always ASCII-8BIT encoding." @@ -312,11 +309,11 @@ d->start = p; } pe = p + len; -#line 318 "ext/fastcsv/fastcsv.c" +#line 315 "ext/fastcsv/fastcsv.c" { if ( p == pe ) goto _test_eof; switch ( cs ) { @@ -331,31 +328,31 @@ break; } } goto st4; tr5: -#line 49 "ext/fastcsv/fastcsv.rl" +#line 46 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 95 "ext/fastcsv/fastcsv.rl" +#line 92 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); field = Qnil; } -#line 166 "ext/fastcsv/fastcsv.rl" +#line 148 "ext/fastcsv/fastcsv.rl" {te = p+1;} goto st4; tr9: -#line 138 "ext/fastcsv/fastcsv.rl" +#line 120 "ext/fastcsv/fastcsv.rl" { if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } else if (p > d->start) { @@ -368,45 +365,32 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } -#line 168 "ext/fastcsv/fastcsv.rl" +#line 150 "ext/fastcsv/fastcsv.rl" {te = p+1;} goto st4; tr12: -#line 95 "ext/fastcsv/fastcsv.rl" +#line 92 "ext/fastcsv/fastcsv.rl" { rb_ary_push(row, field); field = Qnil; } -#line 166 "ext/fastcsv/fastcsv.rl" +#line 148 "ext/fastcsv/fastcsv.rl" {te = p+1;} goto st4; tr15: -#line 168 "ext/fastcsv/fastcsv.rl" +#line 150 "ext/fastcsv/fastcsv.rl" {te = p;p--;} goto st4; tr16: -#line 100 "ext/fastcsv/fastcsv.rl" +#line 97 "ext/fastcsv/fastcsv.rl" { d->start = p; - - if (len_row_sep) { - if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1)) { - FREE; - - rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1); - } - } - else { - len_row_sep = p - mark_row_sep; - row_sep = ALLOC_N(char, p - mark_row_sep); - memcpy(row_sep, mark_row_sep, p - mark_row_sep); - } } -#line 167 "ext/fastcsv/fastcsv.rl" +#line 149 "ext/fastcsv/fastcsv.rl" {te = p;p--;} goto st4; st4: #line 1 "NONE" {ts = 0;} @@ -415,11 +399,11 @@ if ( ++p == pe ) goto _test_eof4; case 4: #line 1 "NONE" {ts = p;} -#line 421 "ext/fastcsv/fastcsv.c" +#line 405 "ext/fastcsv/fastcsv.c" switch( (*p) ) { case 0: goto tr13; case 10: goto tr3; case 13: goto tr4; case 34: goto tr14; @@ -439,22 +423,22 @@ } goto st1; tr2: #line 1 "NONE" {te = p+1;} -#line 49 "ext/fastcsv/fastcsv.rl" +#line 46 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 138 "ext/fastcsv/fastcsv.rl" +#line 120 "ext/fastcsv/fastcsv.rl" { if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } else if (p > d->start) { @@ -467,42 +451,40 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } -#line 168 "ext/fastcsv/fastcsv.rl" +#line 150 "ext/fastcsv/fastcsv.rl" {act = 3;} goto st5; st5: if ( ++p == pe ) goto _test_eof5; case 5: -#line 480 "ext/fastcsv/fastcsv.c" +#line 464 "ext/fastcsv/fastcsv.c" switch( (*p) ) { case 0: goto tr2; case 10: goto tr3; case 13: goto tr4; case 34: goto tr15; case 44: goto tr5; } goto st1; tr3: -#line 49 "ext/fastcsv/fastcsv.rl" +#line 46 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 117 "ext/fastcsv/fastcsv.rl" +#line 101 "ext/fastcsv/fastcsv.rl" { - mark_row_sep = p; - curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -518,14 +500,12 @@ rb_yield(row); row = rb_ary_new(); } goto st6; tr10: -#line 117 "ext/fastcsv/fastcsv.rl" +#line 101 "ext/fastcsv/fastcsv.rl" { - mark_row_sep = p; - curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -544,28 +524,26 @@ goto st6; st6: if ( ++p == pe ) goto _test_eof6; case 6: -#line 550 "ext/fastcsv/fastcsv.c" +#line 530 "ext/fastcsv/fastcsv.c" goto tr16; tr4: -#line 49 "ext/fastcsv/fastcsv.rl" +#line 46 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 117 "ext/fastcsv/fastcsv.rl" +#line 101 "ext/fastcsv/fastcsv.rl" { - mark_row_sep = p; - curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -581,14 +559,12 @@ rb_yield(row); row = rb_ary_new(); } goto st7; tr11: -#line 117 "ext/fastcsv/fastcsv.rl" +#line 101 "ext/fastcsv/fastcsv.rl" { - mark_row_sep = p; - curline++; if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } @@ -607,29 +583,29 @@ goto st7; st7: if ( ++p == pe ) goto _test_eof7; case 7: -#line 613 "ext/fastcsv/fastcsv.c" +#line 589 "ext/fastcsv/fastcsv.c" if ( (*p) == 10 ) goto st6; goto tr16; tr13: #line 1 "NONE" {te = p+1;} -#line 49 "ext/fastcsv/fastcsv.rl" +#line 46 "ext/fastcsv/fastcsv.rl" { if (p == ts) { // Unquoted empty fields are nil, not "", in Ruby. field = Qnil; } else if (p > ts) { field = rb_enc_str_new(ts, p - ts, encoding); ENCODE; } } -#line 138 "ext/fastcsv/fastcsv.rl" +#line 120 "ext/fastcsv/fastcsv.rl" { if (d->start == 0 || p == d->start) { rb_ivar_set(self, s_row, rb_str_new2("")); } else if (p > d->start) { @@ -642,46 +618,46 @@ if (RARRAY_LEN(row)) { rb_yield(row); } } -#line 168 "ext/fastcsv/fastcsv.rl" +#line 150 "ext/fastcsv/fastcsv.rl" {act = 3;} goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 655 "ext/fastcsv/fastcsv.c" +#line 631 "ext/fastcsv/fastcsv.c" switch( (*p) ) { case 10: goto tr15; case 13: goto tr15; case 34: goto tr15; case 44: goto tr15; } goto st1; tr14: -#line 41 "ext/fastcsv/fastcsv.rl" +#line 38 "ext/fastcsv/fastcsv.rl" { unclosed_line = curline; } goto st2; st2: if ( ++p == pe ) goto _test_eof2; case 2: -#line 673 "ext/fastcsv/fastcsv.c" +#line 649 "ext/fastcsv/fastcsv.c" switch( (*p) ) { case 0: goto st0; case 34: goto tr8; } goto st2; st0: cs = 0; goto _out; tr8: -#line 60 "ext/fastcsv/fastcsv.rl" +#line 57 "ext/fastcsv/fastcsv.rl" { if (p == ts) { field = rb_enc_str_new("", 0, encoding); ENCODE; } @@ -712,20 +688,20 @@ if (copy != NULL) { free(copy); } } } -#line 45 "ext/fastcsv/fastcsv.rl" +#line 42 "ext/fastcsv/fastcsv.rl" { unclosed_line = 0; } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 727 "ext/fastcsv/fastcsv.c" +#line 703 "ext/fastcsv/fastcsv.c" switch( (*p) ) { case 0: goto tr9; case 10: goto tr10; case 13: goto tr11; case 34: goto st2; @@ -755,12 +731,19 @@ } _out: {} } -#line 427 "ext/fastcsv/fastcsv.rl" +#line 409 "ext/fastcsv/fastcsv.rl" if (done && cs < raw_parse_first_final) { + if (d->start == 0 || p == d->start) { + rb_ivar_set(self, s_row, rb_str_new2("")); + } + else if (p > d->start) { + rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start)); + } + FREE; if (unclosed_line) { rb_raise(eError, "Unclosed quoted field on line %d.", unclosed_line); }