ext/c_levenshtein/levenshtein.c in phonetics-1.5.2 vs ext/c_levenshtein/levenshtein.c in phonetics-1.5.3

- old
+ new

@@ -1,35 +1,33 @@ #include "ruby.h" +#include <stdbool.h> #include "./phonetic_cost.h" -#define NDEBUG true -#ifdef NDEBUG -#define debug(M, ...) -#else -#define debug(M, ...) printf(M, ##__VA_ARGS__) -#endif +#define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__) VALUE Binding = Qnil; /* Function declarations */ void Init_c_levenshtein(); -void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length); -void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length); -VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2); +void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose); +void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose); +VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose); + /* Function implemitations */ void Init_c_levenshtein() { Binding = rb_define_module("PhoneticsLevenshteinCBinding"); - rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance, 2); + rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance, 3); } -VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2){ +VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose){ VALUE *string1_ruby = RARRAY_PTR(_string1); VALUE *string2_ruby = RARRAY_PTR(_string2); + bool verbose = _verbose; int string1_length = (int) RARRAY_LEN(_string1); int string2_length = (int) RARRAY_LEN(_string2); // We name them as 'strings' but in C-land we're representing our strings as // arrays of `int`s, where each int represents a consistent (if unusual) // encoding of a grapheme cluster (a symbol for a phoneme). @@ -45,11 +43,11 @@ cost; int i, j; // Frequently overwritten loop vars // Guard clause for two empty strings if (string1_length == 0 && string2_length == 0) - return DBL2NUM(0.0d); + return DBL2NUM(0.0); // // Intial data setup // @@ -72,14 +70,14 @@ // this case the last spot in the array) // // First, set the top row and left column of the matrix using the sequential // phonetic edit distance of string1 and string2, respectively - set_initial(d, string1, string1_length, string2, string2_length); + set_initial(d, string1, string1_length, string2, string2_length, verbose); debug("before:\n"); - print_matrix(d, string1, string1_length, string2, string2_length); + print_matrix(d, string1, string1_length, string2, string2_length, verbose); // Then walk through the matrix and fill in each cell with the lowest-cost // phonetic edit distance for that matrix cell. // (Skipping i=0 and j=0 because set_initial filled in all cells where i // or j are zero-valued) @@ -115,11 +113,11 @@ min = replace; } d[(j * (string1_length+1)) + i] = min + cost; debug("\n"); - print_matrix(d, string1, string1_length, string2, string2_length); + print_matrix(d, string1, string1_length, string2, string2_length, verbose); } } // The final element in the `d` array is the value of the shortest path from // the top-left to the bottom-right of the matrix. @@ -139,11 +137,11 @@ // The second value is always the phonetic distance between the first // phonemes of each string. // Subsequent values are the cumulative phonetic distance between each // phoneme within the same string. // "aek" -> [0.0, 1.0, 1.61, 2.61] -void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length) { +void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) { double distance_between_first_phonemes; int i, j; if (string1_length == 0 || string2_length == 0) { @@ -152,10 +150,11 @@ distance_between_first_phonemes = 0.0; } else { distance_between_first_phonemes = phonetic_cost(string1[0], string2[0]); } + d[0] = (double) 0.0; // Set the first value of string1's sequential phonetic calculation (maps to // cell x=1, y=0) d[1] = distance_between_first_phonemes; // And of string2 (maps to cell x=0, y=1) if (string2_length > 0) { @@ -174,10 +173,10 @@ d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]); } } // A handy visualization for developers -void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length) { +void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) { int i, j; debug(" "); for (i=0; i < string1_length; i++) { debug("%8.d ", string1[i]); }