ext/c_levenshtein/levenshtein.c in phonetics-1.5.2 vs ext/c_levenshtein/levenshtein.c in phonetics-1.5.3
- old
+ new
@@ -1,35 +1,33 @@
#include "ruby.h"
+#include <stdbool.h>
#include "./phonetic_cost.h"
-#define NDEBUG true
-#ifdef NDEBUG
-#define debug(M, ...)
-#else
-#define debug(M, ...) printf(M, ##__VA_ARGS__)
-#endif
+#define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__)
VALUE Binding = Qnil;
/* Function declarations */
void Init_c_levenshtein();
-void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length);
-void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length);
-VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2);
+void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
+void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
+VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose);
+
/* Function implemitations */
void Init_c_levenshtein() {
Binding = rb_define_module("PhoneticsLevenshteinCBinding");
- rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance, 2);
+ rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance, 3);
}
-VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2){
+VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose){
VALUE *string1_ruby = RARRAY_PTR(_string1);
VALUE *string2_ruby = RARRAY_PTR(_string2);
+ bool verbose = _verbose;
int string1_length = (int) RARRAY_LEN(_string1);
int string2_length = (int) RARRAY_LEN(_string2);
// We name them as 'strings' but in C-land we're representing our strings as
// arrays of `int`s, where each int represents a consistent (if unusual)
// encoding of a grapheme cluster (a symbol for a phoneme).
@@ -45,11 +43,11 @@
cost;
int i, j; // Frequently overwritten loop vars
// Guard clause for two empty strings
if (string1_length == 0 && string2_length == 0)
- return DBL2NUM(0.0d);
+ return DBL2NUM(0.0);
//
// Intial data setup
//
@@ -72,14 +70,14 @@
// this case the last spot in the array)
//
// First, set the top row and left column of the matrix using the sequential
// phonetic edit distance of string1 and string2, respectively
- set_initial(d, string1, string1_length, string2, string2_length);
+ set_initial(d, string1, string1_length, string2, string2_length, verbose);
debug("before:\n");
- print_matrix(d, string1, string1_length, string2, string2_length);
+ print_matrix(d, string1, string1_length, string2, string2_length, verbose);
// Then walk through the matrix and fill in each cell with the lowest-cost
// phonetic edit distance for that matrix cell.
// (Skipping i=0 and j=0 because set_initial filled in all cells where i
// or j are zero-valued)
@@ -115,11 +113,11 @@
min = replace;
}
d[(j * (string1_length+1)) + i] = min + cost;
debug("\n");
- print_matrix(d, string1, string1_length, string2, string2_length);
+ print_matrix(d, string1, string1_length, string2, string2_length, verbose);
}
}
// The final element in the `d` array is the value of the shortest path from
// the top-left to the bottom-right of the matrix.
@@ -139,11 +137,11 @@
// The second value is always the phonetic distance between the first
// phonemes of each string.
// Subsequent values are the cumulative phonetic distance between each
// phoneme within the same string.
// "aek" -> [0.0, 1.0, 1.61, 2.61]
-void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length) {
+void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
double distance_between_first_phonemes;
int i, j;
if (string1_length == 0 || string2_length == 0) {
@@ -152,10 +150,11 @@
distance_between_first_phonemes = 0.0;
} else {
distance_between_first_phonemes = phonetic_cost(string1[0], string2[0]);
}
+ d[0] = (double) 0.0;
// Set the first value of string1's sequential phonetic calculation (maps to
// cell x=1, y=0)
d[1] = distance_between_first_phonemes;
// And of string2 (maps to cell x=0, y=1)
if (string2_length > 0) {
@@ -174,10 +173,10 @@
d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]);
}
}
// A handy visualization for developers
-void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length) {
+void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
int i, j;
debug(" ");
for (i=0; i < string1_length; i++) {
debug("%8.d ", string1[i]);
}