ext/c_levenshtein/levenshtein.c in phonetics-1.5.3 vs ext/c_levenshtein/levenshtein.c in phonetics-1.5.4
- old
+ new
@@ -53,13 +53,13 @@
for (i = 0; i < string1_length; i++) {
string1[i] = NUM2INT(string1_ruby[i]);
debug("string1[%d] = %d\n", i, string1[i]);
}
- for (i = 0; i < string2_length; i++) {
- string2[i] = NUM2INT(string2_ruby[i]);
- debug("string2[%d] = %d\n", i, string2[i]);
+ for (j = 0; j < string2_length; j++) {
+ string2[j] = NUM2INT(string2_ruby[j]);
+ debug("string2[%d] = %d\n", i, string2[j]);
}
// one-dimensional representation of 2 dimentional array len(string1)+1 *
// len(string2)+1
d = malloc((sizeof(double)) * (string1_length+1) * (string2_length+1));
@@ -79,12 +79,12 @@
// Then walk through the matrix and fill in each cell with the lowest-cost
// phonetic edit distance for that matrix cell.
// (Skipping i=0 and j=0 because set_initial filled in all cells where i
// or j are zero-valued)
- for(j = 1; j <= string2_length; j++){
- for(i = 1; i <= string1_length; i++){
+ for (j = 1; j <= string2_length; j++){
+ for (i = 1; i <= string1_length; i++){
// The cost of deletion or addition is the Levenshtein distance
// calculation (the value in the cell to the left, upper-left, or above)
// plus the phonetic distance between the sound we're moving from to the
// new one.
@@ -139,28 +139,26 @@
// Subsequent values are the cumulative phonetic distance between each
// phoneme within the same string.
// "aek" -> [0.0, 1.0, 1.61, 2.61]
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
- double distance_between_first_phonemes;
+ double initial_distance;
int i, j;
if (string1_length == 0 || string2_length == 0) {
- distance_between_first_phonemes = 0.0;
- } else if (string1[0] == string2[0]) {
- distance_between_first_phonemes = 0.0;
+ initial_distance = 0.0;
} else {
- distance_between_first_phonemes = phonetic_cost(string1[0], string2[0]);
+ initial_distance = 1.0;
}
+ // The top-left is 0, the cell to the right and down are each 1 to start
d[0] = (double) 0.0;
- // Set the first value of string1's sequential phonetic calculation (maps to
- // cell x=1, y=0)
- d[1] = distance_between_first_phonemes;
- // And of string2 (maps to cell x=0, y=1)
+ if (string1_length > 0) {
+ d[1] = initial_distance;
+ }
if (string2_length > 0) {
- d[string1_length+1] = distance_between_first_phonemes;
+ d[string1_length+1] = initial_distance;
}
debug("string1 length: %d\n", string1_length);
for (i=2; i <= string1_length; i++) {
// The cost of adding the next phoneme is the cost so far plus the phonetic
@@ -169,9 +167,17 @@
}
debug("string2 length: %d\n", string2_length);
for (j=2; j <= string2_length; j++) {
// The same exact pattern down the left side of the matrix
d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]);
+ }
+
+ // And zero out the rest. If you're reading this please edit this to be
+ // faster.
+ for (j=1; j <= string2_length; j++) {
+ for (i=1; i <= string1_length; i++) {
+ d[j * (string1_length+1) + i] = (double) 0.0;
+ }
}
}
// A handy visualization for developers
void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {