ext/fastcsv/fastcsv.c in fastcsv-0.0.3 vs ext/fastcsv/fastcsv.c in fastcsv-0.0.4
- old
+ new
@@ -22,13 +22,10 @@
}
#define FREE \
if (buf != NULL) { \
free(buf); \
-} \
-if (row_sep != NULL) { \
- free(row_sep); \
}
static VALUE cClass, cParser, eError;
static ID s_read, s_row;
@@ -36,23 +33,23 @@
typedef struct {
char *start;
} Data;
-#line 170 "ext/fastcsv/fastcsv.rl"
+#line 152 "ext/fastcsv/fastcsv.rl"
-#line 46 "ext/fastcsv/fastcsv.c"
+#line 43 "ext/fastcsv/fastcsv.c"
static const int raw_parse_start = 4;
static const int raw_parse_first_final = 4;
static const int raw_parse_error = 0;
static const int raw_parse_en_main = 4;
-#line 173 "ext/fastcsv/fastcsv.rl"
+#line 155 "ext/fastcsv/fastcsv.rl"
// 16 kB
#define BUFSIZE 16384
// @see http://rxr.whitequark.org/mri/source/io.c#4845
@@ -81,15 +78,15 @@
}
}
static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
int cs, act, have = 0, curline = 1, io = 0;
- char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = NULL;
+ char *ts = 0, *te = 0, *buf = 0, *eof = 0;
VALUE port, opts, r_encoding;
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
- int done = 0, unclosed_line = 0, len_row_sep = 0, buffer_size = 0, taint = 0;
+ int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
rb_encoding *enc = NULL, *enc2 = NULL, *encoding = NULL;
Data *d;
Data_Get_Struct(self, Data, d);
@@ -223,10 +220,12 @@
}
else if (!NIL_P(r_encoding)) {
encoding = rb_enc_get(r_encoding);
}
+ // In case #raw_parse is called multiple times on the same parser. Note that
+ // using IO methods on a re-used parser can cause segmentation faults.
rb_ivar_set(self, s_row, Qnil);
buffer_size = BUFSIZE;
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
@@ -242,42 +241,40 @@
if (io) {
buf = ALLOC_N(char, buffer_size);
}
-#line 248 "ext/fastcsv/fastcsv.c"
+#line 247 "ext/fastcsv/fastcsv.c"
{
cs = raw_parse_start;
ts = 0;
te = 0;
act = 0;
}
-#line 366 "ext/fastcsv/fastcsv.rl"
+#line 350 "ext/fastcsv/fastcsv.rl"
while (!done) {
VALUE str;
char *p, *pe;
- int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff, mark_row_sep_diff;
+ int len, space = buffer_size - have, tokstart_diff, tokend_diff, start_diff;
if (io) {
if (space == 0) {
// Not moving d->start will cause intermittent segmentation faults.
tokstart_diff = ts - buf;
tokend_diff = te - buf;
start_diff = d->start - buf;
- mark_row_sep_diff = mark_row_sep - buf;
buffer_size += BUFSIZE;
REALLOC_N(buf, char, buffer_size);
space = buffer_size - have;
ts = buf + tokstart_diff;
te = buf + tokend_diff;
d->start = buf + start_diff;
- mark_row_sep = buf + mark_row_sep_diff;
}
p = buf + have;
// Reads "`length` bytes without any conversion (binary mode)."
// "The resulted string is always ASCII-8BIT encoding."
@@ -312,11 +309,11 @@
d->start = p;
}
pe = p + len;
-#line 318 "ext/fastcsv/fastcsv.c"
+#line 315 "ext/fastcsv/fastcsv.c"
{
if ( p == pe )
goto _test_eof;
switch ( cs )
{
@@ -331,31 +328,31 @@
break;
}
}
goto st4;
tr5:
-#line 49 "ext/fastcsv/fastcsv.rl"
+#line 46 "ext/fastcsv/fastcsv.rl"
{
if (p == ts) {
// Unquoted empty fields are nil, not "", in Ruby.
field = Qnil;
}
else if (p > ts) {
field = rb_enc_str_new(ts, p - ts, encoding);
ENCODE;
}
}
-#line 95 "ext/fastcsv/fastcsv.rl"
+#line 92 "ext/fastcsv/fastcsv.rl"
{
rb_ary_push(row, field);
field = Qnil;
}
-#line 166 "ext/fastcsv/fastcsv.rl"
+#line 148 "ext/fastcsv/fastcsv.rl"
{te = p+1;}
goto st4;
tr9:
-#line 138 "ext/fastcsv/fastcsv.rl"
+#line 120 "ext/fastcsv/fastcsv.rl"
{
if (d->start == 0 || p == d->start) {
rb_ivar_set(self, s_row, rb_str_new2(""));
}
else if (p > d->start) {
@@ -368,45 +365,32 @@
if (RARRAY_LEN(row)) {
rb_yield(row);
}
}
-#line 168 "ext/fastcsv/fastcsv.rl"
+#line 150 "ext/fastcsv/fastcsv.rl"
{te = p+1;}
goto st4;
tr12:
-#line 95 "ext/fastcsv/fastcsv.rl"
+#line 92 "ext/fastcsv/fastcsv.rl"
{
rb_ary_push(row, field);
field = Qnil;
}
-#line 166 "ext/fastcsv/fastcsv.rl"
+#line 148 "ext/fastcsv/fastcsv.rl"
{te = p+1;}
goto st4;
tr15:
-#line 168 "ext/fastcsv/fastcsv.rl"
+#line 150 "ext/fastcsv/fastcsv.rl"
{te = p;p--;}
goto st4;
tr16:
-#line 100 "ext/fastcsv/fastcsv.rl"
+#line 97 "ext/fastcsv/fastcsv.rl"
{
d->start = p;
-
- if (len_row_sep) {
- if (p - mark_row_sep != len_row_sep || row_sep[0] != *mark_row_sep || len_row_sep == 2 && row_sep[1] != *(mark_row_sep + 1)) {
- FREE;
-
- rb_raise(eError, "Unquoted fields do not allow \\r or \\n (line %d).", curline - 1);
- }
- }
- else {
- len_row_sep = p - mark_row_sep;
- row_sep = ALLOC_N(char, p - mark_row_sep);
- memcpy(row_sep, mark_row_sep, p - mark_row_sep);
- }
}
-#line 167 "ext/fastcsv/fastcsv.rl"
+#line 149 "ext/fastcsv/fastcsv.rl"
{te = p;p--;}
goto st4;
st4:
#line 1 "NONE"
{ts = 0;}
@@ -415,11 +399,11 @@
if ( ++p == pe )
goto _test_eof4;
case 4:
#line 1 "NONE"
{ts = p;}
-#line 421 "ext/fastcsv/fastcsv.c"
+#line 405 "ext/fastcsv/fastcsv.c"
switch( (*p) ) {
case 0: goto tr13;
case 10: goto tr3;
case 13: goto tr4;
case 34: goto tr14;
@@ -439,22 +423,22 @@
}
goto st1;
tr2:
#line 1 "NONE"
{te = p+1;}
-#line 49 "ext/fastcsv/fastcsv.rl"
+#line 46 "ext/fastcsv/fastcsv.rl"
{
if (p == ts) {
// Unquoted empty fields are nil, not "", in Ruby.
field = Qnil;
}
else if (p > ts) {
field = rb_enc_str_new(ts, p - ts, encoding);
ENCODE;
}
}
-#line 138 "ext/fastcsv/fastcsv.rl"
+#line 120 "ext/fastcsv/fastcsv.rl"
{
if (d->start == 0 || p == d->start) {
rb_ivar_set(self, s_row, rb_str_new2(""));
}
else if (p > d->start) {
@@ -467,42 +451,40 @@
if (RARRAY_LEN(row)) {
rb_yield(row);
}
}
-#line 168 "ext/fastcsv/fastcsv.rl"
+#line 150 "ext/fastcsv/fastcsv.rl"
{act = 3;}
goto st5;
st5:
if ( ++p == pe )
goto _test_eof5;
case 5:
-#line 480 "ext/fastcsv/fastcsv.c"
+#line 464 "ext/fastcsv/fastcsv.c"
switch( (*p) ) {
case 0: goto tr2;
case 10: goto tr3;
case 13: goto tr4;
case 34: goto tr15;
case 44: goto tr5;
}
goto st1;
tr3:
-#line 49 "ext/fastcsv/fastcsv.rl"
+#line 46 "ext/fastcsv/fastcsv.rl"
{
if (p == ts) {
// Unquoted empty fields are nil, not "", in Ruby.
field = Qnil;
}
else if (p > ts) {
field = rb_enc_str_new(ts, p - ts, encoding);
ENCODE;
}
}
-#line 117 "ext/fastcsv/fastcsv.rl"
+#line 101 "ext/fastcsv/fastcsv.rl"
{
- mark_row_sep = p;
-
curline++;
if (d->start == 0 || p == d->start) {
rb_ivar_set(self, s_row, rb_str_new2(""));
}
@@ -518,14 +500,12 @@
rb_yield(row);
row = rb_ary_new();
}
goto st6;
tr10:
-#line 117 "ext/fastcsv/fastcsv.rl"
+#line 101 "ext/fastcsv/fastcsv.rl"
{
- mark_row_sep = p;
-
curline++;
if (d->start == 0 || p == d->start) {
rb_ivar_set(self, s_row, rb_str_new2(""));
}
@@ -544,28 +524,26 @@
goto st6;
st6:
if ( ++p == pe )
goto _test_eof6;
case 6:
-#line 550 "ext/fastcsv/fastcsv.c"
+#line 530 "ext/fastcsv/fastcsv.c"
goto tr16;
tr4:
-#line 49 "ext/fastcsv/fastcsv.rl"
+#line 46 "ext/fastcsv/fastcsv.rl"
{
if (p == ts) {
// Unquoted empty fields are nil, not "", in Ruby.
field = Qnil;
}
else if (p > ts) {
field = rb_enc_str_new(ts, p - ts, encoding);
ENCODE;
}
}
-#line 117 "ext/fastcsv/fastcsv.rl"
+#line 101 "ext/fastcsv/fastcsv.rl"
{
- mark_row_sep = p;
-
curline++;
if (d->start == 0 || p == d->start) {
rb_ivar_set(self, s_row, rb_str_new2(""));
}
@@ -581,14 +559,12 @@
rb_yield(row);
row = rb_ary_new();
}
goto st7;
tr11:
-#line 117 "ext/fastcsv/fastcsv.rl"
+#line 101 "ext/fastcsv/fastcsv.rl"
{
- mark_row_sep = p;
-
curline++;
if (d->start == 0 || p == d->start) {
rb_ivar_set(self, s_row, rb_str_new2(""));
}
@@ -607,29 +583,29 @@
goto st7;
st7:
if ( ++p == pe )
goto _test_eof7;
case 7:
-#line 613 "ext/fastcsv/fastcsv.c"
+#line 589 "ext/fastcsv/fastcsv.c"
if ( (*p) == 10 )
goto st6;
goto tr16;
tr13:
#line 1 "NONE"
{te = p+1;}
-#line 49 "ext/fastcsv/fastcsv.rl"
+#line 46 "ext/fastcsv/fastcsv.rl"
{
if (p == ts) {
// Unquoted empty fields are nil, not "", in Ruby.
field = Qnil;
}
else if (p > ts) {
field = rb_enc_str_new(ts, p - ts, encoding);
ENCODE;
}
}
-#line 138 "ext/fastcsv/fastcsv.rl"
+#line 120 "ext/fastcsv/fastcsv.rl"
{
if (d->start == 0 || p == d->start) {
rb_ivar_set(self, s_row, rb_str_new2(""));
}
else if (p > d->start) {
@@ -642,46 +618,46 @@
if (RARRAY_LEN(row)) {
rb_yield(row);
}
}
-#line 168 "ext/fastcsv/fastcsv.rl"
+#line 150 "ext/fastcsv/fastcsv.rl"
{act = 3;}
goto st8;
st8:
if ( ++p == pe )
goto _test_eof8;
case 8:
-#line 655 "ext/fastcsv/fastcsv.c"
+#line 631 "ext/fastcsv/fastcsv.c"
switch( (*p) ) {
case 10: goto tr15;
case 13: goto tr15;
case 34: goto tr15;
case 44: goto tr15;
}
goto st1;
tr14:
-#line 41 "ext/fastcsv/fastcsv.rl"
+#line 38 "ext/fastcsv/fastcsv.rl"
{
unclosed_line = curline;
}
goto st2;
st2:
if ( ++p == pe )
goto _test_eof2;
case 2:
-#line 673 "ext/fastcsv/fastcsv.c"
+#line 649 "ext/fastcsv/fastcsv.c"
switch( (*p) ) {
case 0: goto st0;
case 34: goto tr8;
}
goto st2;
st0:
cs = 0;
goto _out;
tr8:
-#line 60 "ext/fastcsv/fastcsv.rl"
+#line 57 "ext/fastcsv/fastcsv.rl"
{
if (p == ts) {
field = rb_enc_str_new("", 0, encoding);
ENCODE;
}
@@ -712,20 +688,20 @@
if (copy != NULL) {
free(copy);
}
}
}
-#line 45 "ext/fastcsv/fastcsv.rl"
+#line 42 "ext/fastcsv/fastcsv.rl"
{
unclosed_line = 0;
}
goto st3;
st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
-#line 727 "ext/fastcsv/fastcsv.c"
+#line 703 "ext/fastcsv/fastcsv.c"
switch( (*p) ) {
case 0: goto tr9;
case 10: goto tr10;
case 13: goto tr11;
case 34: goto st2;
@@ -755,12 +731,19 @@
}
_out: {}
}
-#line 427 "ext/fastcsv/fastcsv.rl"
+#line 409 "ext/fastcsv/fastcsv.rl"
if (done && cs < raw_parse_first_final) {
+ if (d->start == 0 || p == d->start) {
+ rb_ivar_set(self, s_row, rb_str_new2(""));
+ }
+ else if (p > d->start) {
+ rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
+ }
+
FREE;
if (unclosed_line) {
rb_raise(eError, "Unclosed quoted field on line %d.", unclosed_line);
}