split/Dvector/dvector.c in tioga-1.6 vs split/Dvector/dvector.c in tioga-1.7
- old
+ new
@@ -308,10 +308,27 @@
/* we set dirty to 0 */
d->dirty = 0;
return ary;
}
+/* Makes a Dvector with the given data. No additional capacity. */
+PRIVATE VALUE make_dvector_from_data(VALUE klass, long len, double * data) {
+ VALUE ary = dvector_alloc(klass);
+ Dvector *d = Get_Dvector(ary);
+ if (len < 0) {
+ rb_raise(rb_eArgError, "negative dvector size (or size too big)");
+ }
+ d->len = len;
+ if (len == 0) len++;
+ d->ptr = ALLOC_N(double, len);
+ MEMCPY(d->ptr, data, double, len);
+ d->capa = len;
+ /* we set dirty to 0 */
+ d->dirty = 0;
+ return ary;
+}
+
PRIVATE VALUE dvector_new2(long len, long capa) {
return make_new_dvector(cDvector, len, capa);
}
PRIVATE VALUE dvector_new() {
@@ -1428,11 +1445,11 @@
StringValue(sep);
len += RSTRING_LEN(sep) * (d->len - 1); /* So it works for ruby 1.9 */
}
result = rb_str_buf_new(len);
for (i=0; i < d->len; i++) {
- sprintf(buff, "%g", d->ptr[i]);
+ snprintf(buff,sizeof(buff), "%g", d->ptr[i]);
tmp = rb_str_new2(buff);
if (i > 0 && !NIL_P(sep)) rb_str_buf_append(result, sep);
rb_str_buf_append(result, tmp);
}
if (taint) OBJ_TAINT(result);
@@ -2095,23 +2112,16 @@
* a -> Dvector[ -1, -2, -3 ]
*/
VALUE dvector_replace(VALUE dest, VALUE orig) {
VALUE shared;
Dvector *org, *d;
- dvector_modify(dest);
+ dvector_modify(dest); // take care of any sharing issues.
orig = dvector_to_dvector(orig); /* it might be some kind of Array rather than a Dvector */
if (dest == orig) return dest;
org = Get_Dvector(orig);
d = Get_Dvector(dest);
- if (d->ptr) {
- if (0 && d->capa >= org->len && d->shared == Qnil) {
- d->len = org->len;
- MEMCPY(d->ptr, org->ptr, double, d->len);
- return dest;
- }
- free(d->ptr);
- }
+ if (d->ptr) free(d->ptr); // we know it isn't shared because we did dvector_modify above
shared = dvector_make_shared(orig);
org = Get_Dvector(shared);
d->ptr = org->ptr;
d->len = org->len;
d->shared = shared;
@@ -4319,20 +4329,63 @@
*/
VALUE dvector_div_bang(VALUE ary, VALUE arg) {
return dvector_apply_math_op2_bang(ary, arg, do_div);
}
+static char *fill_read_buffer(char **buff_ptr, int *len_ptr, FILE *file) {
+ char *buff, *new_buff;
+ buff = *buff_ptr;
+ int len, i, max_tries, line_len;
+ long filepos = ftell(file);
+ if (filepos == -1) {
+ printf("ftell failed\n");
+ return NULL;
+ }
+ max_tries = 10;
+ for (i = 0; i < max_tries; i++) {
+ len = *len_ptr;
+ buff[len-1] = '1'; // mark the last character position
+ buff = fgets(buff, len, file);
+ if (buff == NULL) return NULL; // end of file
+ if (buff[len-1] != '\0') {
+ if (0) {
+ line_len = strlen(buff);
+ printf("len %i line_len %i\n", len, line_len);
+ if (line_len < 80) {
+ printf("line buff contains: %s\n", buff);
+ } else {
+ printf("line buff ends with: %s\n", buff+line_len-80);
+ }
+ }
+ return buff;
+ }
+ // ran out of room -- make buffer larger and try again
+ len = 10*len + 100;
+ //printf("fill_read_buffer ran out of room -- increase buffer len to %i and try again\n", len);
+ *len_ptr = len;
+ new_buff = (char *)realloc(buff, len);
+ if (new_buff == NULL) break;
+ buff = new_buff;
+ *buff_ptr = buff;
+ if (fseek(file, filepos, SEEK_SET) != 0) {
+ printf("fseek failed\n");
+ return NULL;
+ }
+ }
+ return NULL;
+}
+
PRIVATE
/*======================================================================*/
VALUE Read_Dvectors(char *filename, VALUE destinations, int first_row_of_file, int number_of_rows) {
FILE *file = NULL;
VALUE col_obj, cols_obj, *cols_ptr = NULL;
Dvector *d;
double v;
int last_row_of_file;
- const int buff_len = 10000;
- char buff[buff_len], *num_str, *pend, c, *cptr;
+ int buff_len = 100;
+ char *buff, *num_str, *pend, c, *cptr;
int num_cols = 0, i, row, col, buff_loc, skip = first_row_of_file - 1;
last_row_of_file = (number_of_rows == -1)? -1 : first_row_of_file + number_of_rows - 1;
if ((last_row_of_file != -1 && last_row_of_file < first_row_of_file) || filename == NULL) return false;
if (destinations != Qnil) {
cols_obj = rb_Array(destinations);
@@ -4353,19 +4406,25 @@
}
}
if ((file=fopen(filename,"r")) == NULL) {
rb_raise(rb_eArgError, "ERROR: read cannot open %s", filename);
}
+ buff = (char *)malloc(buff_len);
+ if (buff == NULL) {
+ fclose(file);
+ rb_raise(rb_eArgError, "ERROR: allocation of read buffer failed");
+ }
for (i = 0; i < skip; i++) { /* skip over initial lines */
- if (fgets(buff, buff_len, file)==NULL) {
+ if (fill_read_buffer(&buff, &buff_len, file)==NULL) {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: read reached end of file before reaching line %i in %s",
first_row_of_file, filename);
}
}
for (row = 0, i = first_row_of_file; last_row_of_file == -1 || i <= last_row_of_file; row++, i++) {
- if (fgets(buff, buff_len, file)==NULL) break; /* have reached end of file */
+ if (fill_read_buffer(&buff, &buff_len, file)==NULL) break; /* have reached end of file */
if (destinations == Qnil) { /* create destinations */
buff_loc = 0;
while (true) {
while (isspace(buff[buff_loc])) buff_loc++; /* skip leading blanks */
if (buff[buff_loc] == '\0') break;
@@ -4384,16 +4443,18 @@
buff_loc = 0;
for (col = 0; col < num_cols; col++) {
while (isspace(buff[buff_loc])) buff_loc++; /* skip leading blanks */
if (buff[buff_loc] == '\0') {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "read reached end of line looking for column %i in line %i of %s", col+1, i, filename);
}
num_str = buff+buff_loc;
while (isgraph(buff[buff_loc])) buff_loc++; /* include non-blanks */
if (buff[buff_loc] == '\0') {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: read reached end of line looking for column %i in line %i of %s", col+1, i, filename);
}
col_obj = cols_ptr[col];
if (col_obj == Qnil) continue;
Data_Get_Struct(col_obj, Dvector, d);
@@ -4408,18 +4469,20 @@
cptr = pend+5; c = *cptr; pend[5] = ' '; pend[4] = pend[3]; pend[3] = pend[2];
pend[2] = pend[1]; pend[1] = pend[0]; pend[0] = 'E';
v = strtod(num_str,&pend); *cptr = c; buff_loc = pend - buff;
} else {
fclose(file);
+ free(buff);
pend[0] = 0;
rb_raise(rb_eArgError, "ERROR: unreadable value in file %s in line %i: %s", filename, i , buff+buff_loc);
}
}
}
if (!is_okay_number(v)) {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: bad value %g in line %i of %s -- %s", v, i, filename, num_str);
}
if (row >= d->capa)
Dvector_Store_Double(col_obj, row, v);
else {
@@ -4428,10 +4491,11 @@
d->ptr[row] = v;
}
}
}
fclose(file);
+ free(buff);
return destinations;
}
PRIVATE
/*
@@ -4465,12 +4529,12 @@
VALUE Read_Rows_of_Dvectors(char *filename, VALUE destinations, int first_row_of_file) {
FILE *file = NULL;
VALUE row_obj, rows_obj, *rows_ptr = NULL;
Dvector *d;
double v, *row_data;
- const int buff_len = 10000;
- char buff[buff_len], *num_str, *pend, c, *cptr;
+ int buff_len = 1000;
+ char *buff, *num_str, *pend, c, *cptr;
int num_rows = 0, i, row, col, buff_loc, c_loc, skip = first_row_of_file - 1;
rows_obj = rb_Array(destinations);
num_rows = RARRAY(rows_obj)->len;
rows_ptr = RARRAY(rows_obj)->ptr;
for (i = 0; i < num_rows; i++) { /* first pass to check validity */
@@ -4487,20 +4551,27 @@
d->len = 0;
}
if ((file=fopen(filename,"r")) == NULL) {
rb_raise(rb_eArgError, "ERROR: read_rows cannot open %s", filename);
}
+ buff = (char *)malloc(buff_len);
+ if (buff == NULL) {
+ fclose(file);
+ rb_raise(rb_eArgError, "ERROR: allocation of read buffer failed");
+ }
for (i = 0; i < skip; i++) { /* skip over initial lines */
- if (fgets(buff, buff_len, file)==NULL) {
+ if (fill_read_buffer(&buff, &buff_len, file)==NULL) {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: read_rows reached end of file before reaching line %i in %s",
first_row_of_file, filename);
}
}
for (row = 0, i = first_row_of_file; row < num_rows; row++, i++) {
- if (fgets(buff, buff_len, file)==NULL) {
+ if (fill_read_buffer(&buff, &buff_len, file)==NULL) {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: read_rows reached end of file at line %i in %s", i, filename);
}
row_obj = rows_ptr[row];
if (row_obj == Qnil) continue;
d = Get_Dvector(row_obj);
@@ -4527,10 +4598,11 @@
}
}
if (!is_okay_number(v)) {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: bad value %g in line i% of file %s", v, i, filename);
}
if (col < d->capa) { row_data[col] = v; d->len = col+1; }
else {
Dvector_Store_Double(row_obj, col, v);
@@ -4542,10 +4614,11 @@
REALLOC_N(d->ptr, double, col);
d->capa = col;
}
}
fclose(file);
+ free(buff);
return destinations;
}
PRIVATE
@@ -4567,34 +4640,42 @@
rb_raise(rb_eArgError, "wrong # of arguments(%d) for read_rows", argc);
if (argc > 2) arg3 = NUM2INT(argv[2]);
return Read_Rows_of_Dvectors(StringValueCStr(argv[0]),argv[1],arg3);
klass = Qnil;
}
+
PRIVATE
VALUE Read_Row(char *filename, int row, VALUE row_ary) {
FILE *file = NULL;
- const int buff_len = 10000;
- char buff[buff_len], *num_str, *pend, c, *cptr;
+ int buff_len = 1000;
+ char *buff, *num_str, *pend, c, *cptr;
int i, col, buff_loc;
double v;
if (row <= 0) {
rb_raise(rb_eArgError, "ERROR: read_row line must be positive (not %i) for file %s", row, filename);
}
if (filename == NULL || (file=fopen(filename,"r")) == NULL) {
rb_raise(rb_eArgError, "ERROR: read_row cannot open %s", filename);
}
+ buff = (char *)malloc(buff_len);
+ if (buff == NULL) {
+ fclose(file);
+ rb_raise(rb_eArgError, "ERROR: allocation of read buffer failed");
+ }
for (i = 0; i < row; i++) { /* read lines until reach desired row */
- if (fgets(buff, buff_len, file)==NULL) {
+ if (fill_read_buffer(&buff, &buff_len, file)==NULL) {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: read_row reached end of file before reaching line %i in %s",
row, filename);
}
}
if (row_ary == Qnil) row_ary = dvector_new();
else if (is_a_dvector(row_ary)) dvector_clear(row_ary);
else {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: destination for read_row must be a Dvector");
}
buff_loc = 0;
for (col = 0; ; col++) {
while (isspace(buff[buff_loc])) buff_loc++; /* skip leading blanks */
@@ -4612,23 +4693,26 @@
cptr = pend+5; c = *cptr; pend[5] = ' '; pend[4] = pend[3]; pend[3] = pend[2];
pend[2] = pend[1]; pend[1] = pend[0]; pend[0] = 'E';
v = strtod(num_str,&pend); *cptr = c; buff_loc = pend - buff;
} else {
fclose(file);
+ free(buff);
pend[0] = 0;
rb_raise(rb_eArgError, "ERROR: unreadable value in file %s in line %i: %s", filename, i , buff+buff_loc);
}
}
}
if (!is_okay_number(v)) {
fclose(file);
+ free(buff);
rb_raise(rb_eArgError, "ERROR: bad value %g in line %i of file %s", v, i, filename);
}
Dvector_Store_Double(row_ary, col, v);
}
fclose(file);
+ free(buff);
return row_ary;
}
PRIVATE
/*
@@ -5159,10 +5243,162 @@
}
}
return retval;
}
+
+/*
+ :call-seq:
+ Dvector.fast_fancy_read(stream, options) => Array_of_Dvectors
+
+ Reads data from an IO stream and separate it into columns of data
+ according to the _options_, a hash holding the following elements
+ (compulsory, but you can use FANCY_READ_DEFAULTS):
+ * 'sep': a regular expression that separate the entries
+ * 'comments': any line matching this will be skipped
+ * 'skip_first': skips that many lines before reading anything
+ * 'index_col': if true, the first column returned contains the
+ number of the line read
+ * 'remove_space': whether to remove spaces at the beginning of a line. *This
+ option is currently not implemented !*
+ * 'default': what to put when nothing was found but a number must be used
+
+ As a side note, the read time is highly non-linear, which suggests that
+ the read is memory-allocation/copying-limited, at least for big files.
+ Well, the read time is non-linear for
+
+
+ An internal memory allocation with aggressive policy should solve that,
+ that is, not using directly Dvectors (and it would be way faster to store
+ anyway).
+*/
+static VALUE dvector_fast_fancy_read(VALUE self, VALUE stream, VALUE options)
+{
+ /* First, we read up options: */
+ double def = rb_num2dbl(rb_hash_aref(options,
+ rb_str_new2("default")));
+ int remove_space = RTEST(rb_hash_aref(options,
+ rb_str_new2("remove_space")));
+ int index_col = RTEST(rb_hash_aref(options,
+ rb_str_new2("index_col")));
+ long skip_first = FIX2LONG(rb_hash_aref(options,
+ rb_str_new2("skip_first")));
+ VALUE sep = rb_hash_aref(options, rb_str_new2("sep"));
+ VALUE comments = rb_hash_aref(options, rb_str_new2("comments"));
+
+ /* Then, some various variables: */
+ VALUE line;
+
+ ID chomp_id = rb_intern("chomp!");
+ ID gets_id = rb_intern("gets");
+ long line_number = 0;
+
+ /*
+ Now come the fun part - rudimentary vectors management
+ */
+ int nb_vectors = 0; /* The number of vectors currently created */
+ int current_size = 10; /* The number of slots available */
+ double ** vectors = ALLOC_N(double *, current_size);
+ long index = 0; /* The current index in the vectors */
+ int allocated_size = 5004; /* The size available in the vectors */
+
+
+ int i;
+
+ /* The return value */
+ VALUE ary;
+
+ /* We use a real gets so we can also rely on StringIO, for instance */
+ while(RTEST(line = rb_funcall(stream, gets_id, 0))) {
+ VALUE pre, post, match;
+ const char * line_ptr;
+ int col = 0;
+ line_number++;
+ /* Whether we should skip the line... */
+ if(skip_first >= line_number)
+ continue;
+
+ /* We check for a blank line using isspace: */
+ line_ptr = StringValueCStr(line);
+ while(line_ptr && *line_ptr) {
+ if(! isspace(*line_ptr))
+ break;
+ line_ptr++;
+ }
+ if(! *line_ptr)
+ continue; /* We found a blank line */
+ if(remove_space) /* We replace the contents of the line */
+ line = rb_str_new2(line_ptr);
+
+ /* ... or a comment line */
+ if(RTEST(comments) && RTEST(rb_reg_match(comments, line)))
+ continue;
+
+ /* Then, we remove the newline: */
+ post = line;
+ rb_funcall(post, chomp_id, 0);
+
+ /* We iterate over the different portions between
+ matches
+ */
+ while(RTEST(post)) {
+ const char * a;
+ char * b;
+ if(RTEST(rb_reg_match(sep, post))) {
+ match = rb_gv_get("$~");
+ pre = rb_reg_match_pre(match);
+ post = rb_reg_match_post(match);
+ }
+ else {
+ pre = post;
+ post = Qnil;
+ }
+ a = StringValueCStr(pre);
+ double c = strtod(a, &b);
+ if(b == a)
+ c = def;
+ if(col >= nb_vectors) {
+ nb_vectors++;
+ /* We need to create a new vector */
+ if(col >= current_size) { /* Increase the available size */
+ current_size += 5;
+ REALLOC_N(vectors, double * , current_size);
+ }
+
+ double * vals = vectors[col] = ALLOC_N(double, allocated_size);
+ /* Filling it with the default value */
+ for(i = 0; i < index; i++) {
+ vals[i] = def;
+ }
+ }
+ vectors[col][index] = c;
+ col++;
+ }
+ /* Now, we finish the line */
+ for(; col < nb_vectors; col++)
+ vectors[col][index] = def;
+ index++;
+ /* Now, we reallocate memory if necessary */
+ if(index >= allocated_size) {
+ allocated_size *= 2; /* We double the size */
+ for(col = 0; col < nb_vectors; col++)
+ REALLOC_N(vectors[col], double, allocated_size);
+ }
+ }
+ /* Now, we make up the array */
+ ary = rb_ary_new();
+ for(i = 0; i < nb_vectors; i++) {
+ /* We create a vector */
+ rb_ary_store(ary, i, make_dvector_from_data(cDvector, index, vectors[i]));
+ /* And free the memory */
+ free(vectors[i]);
+ }
+ free(vectors);
+ return ary;
+}
+
+
/*
* Document-class: Dobjects::Dvector
*
* Dvectors are a specialized implementation of one-dimensional arrays of double precision floating point numbers.
* They are intended for use in applications needing efficient processing of large vectors of numeric data.
@@ -5234,11 +5470,12 @@
rb_define_singleton_method(cDvector, "max_of_many", dvector_max_of_many, 1);
rb_define_singleton_method(cDvector, "is_a_dvector", dvector_is_a_dvector, 1);
- rb_define_method(cDvector, "make_bezier_control_points_for_cubic_in_x", dvector_make_bezier_control_points_for_cubic_in_x, 6);
+ rb_define_method(cDvector, "make_bezier_control_points_for_cubic_in_x",
+ dvector_make_bezier_control_points_for_cubic_in_x, 6);
rb_define_method(cDvector, "initialize", dvector_initialize, -1);
rb_define_method(cDvector, "initialize_copy", dvector_replace, 1);
rb_define_method(cDvector, "tridag", dvector_tridag, 4);
@@ -5472,9 +5709,14 @@
rb_define_method(cDvector, "_dump", dvector_dump, 1);
rb_define_singleton_method(cDvector, "_load", dvector_load, 1);
/* simple convolution */
rb_define_method(cDvector, "convolve", dvector_convolve, 2);
+
+ /* Fast fancy read: */
+ rb_define_singleton_method(cDvector, "fast_fancy_read",
+ dvector_fast_fancy_read, 2);
+
dvector_output_fs = Qnil;
rb_global_variable(&dvector_output_fs);
dvector_output_fs = rb_str_new2(" ");