ext/c_levenshtein/levenshtein.c in phonetics-1.5.3 vs ext/c_levenshtein/levenshtein.c in phonetics-1.5.4

- old
+ new

@@ -53,13 +53,13 @@ for (i = 0; i < string1_length; i++) { string1[i] = NUM2INT(string1_ruby[i]); debug("string1[%d] = %d\n", i, string1[i]); } - for (i = 0; i < string2_length; i++) { - string2[i] = NUM2INT(string2_ruby[i]); - debug("string2[%d] = %d\n", i, string2[i]); + for (j = 0; j < string2_length; j++) { + string2[j] = NUM2INT(string2_ruby[j]); + debug("string2[%d] = %d\n", i, string2[j]); } // one-dimensional representation of 2 dimentional array len(string1)+1 * // len(string2)+1 d = malloc((sizeof(double)) * (string1_length+1) * (string2_length+1)); @@ -79,12 +79,12 @@ // Then walk through the matrix and fill in each cell with the lowest-cost // phonetic edit distance for that matrix cell. // (Skipping i=0 and j=0 because set_initial filled in all cells where i // or j are zero-valued) - for(j = 1; j <= string2_length; j++){ - for(i = 1; i <= string1_length; i++){ + for (j = 1; j <= string2_length; j++){ + for (i = 1; i <= string1_length; i++){ // The cost of deletion or addition is the Levenshtein distance // calculation (the value in the cell to the left, upper-left, or above) // plus the phonetic distance between the sound we're moving from to the // new one. @@ -139,28 +139,26 @@ // Subsequent values are the cumulative phonetic distance between each // phoneme within the same string. // "aek" -> [0.0, 1.0, 1.61, 2.61] void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) { - double distance_between_first_phonemes; + double initial_distance; int i, j; if (string1_length == 0 || string2_length == 0) { - distance_between_first_phonemes = 0.0; - } else if (string1[0] == string2[0]) { - distance_between_first_phonemes = 0.0; + initial_distance = 0.0; } else { - distance_between_first_phonemes = phonetic_cost(string1[0], string2[0]); + initial_distance = 1.0; } + // The top-left is 0, the cell to the right and down are each 1 to start d[0] = (double) 0.0; - // Set the first value of string1's sequential phonetic calculation (maps to - // cell x=1, y=0) - d[1] = distance_between_first_phonemes; - // And of string2 (maps to cell x=0, y=1) + if (string1_length > 0) { + d[1] = initial_distance; + } if (string2_length > 0) { - d[string1_length+1] = distance_between_first_phonemes; + d[string1_length+1] = initial_distance; } debug("string1 length: %d\n", string1_length); for (i=2; i <= string1_length; i++) { // The cost of adding the next phoneme is the cost so far plus the phonetic @@ -169,9 +167,17 @@ } debug("string2 length: %d\n", string2_length); for (j=2; j <= string2_length; j++) { // The same exact pattern down the left side of the matrix d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]); + } + + // And zero out the rest. If you're reading this please edit this to be + // faster. + for (j=1; j <= string2_length; j++) { + for (i=1; i <= string1_length; i++) { + d[j * (string1_length+1) + i] = (double) 0.0; + } } } // A handy visualization for developers void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {