lib/phonetics/levenshtein.rb in phonetics-1.5.4 vs lib/phonetics/levenshtein.rb in phonetics-1.8.0

- old
+ new

@@ -1,38 +1,29 @@ # frozen_string_literal: true +require_relative '../phonetics' require_relative 'c_levenshtein' + # Using the Damerau version of the Levenshtein algorithm, with phonetic feature # count used instead of a binary edit distance calculation # -# This implementation is almost entirely taken from the damerau-levenshtein gem +# This implementation was dually inspired by the damerau-levenshtein gem # (https://github.com/GlobalNamesArchitecture/damerau-levenshtein/tree/master/ext/damerau_levenshtein). -# The implementation is modified based on "Using Phonologically Weighted -# Levenshtein Distances for the Prediction of Microscopic Intelligibility" by -# Lionel Fontan, Isabelle Ferrané, Jérôme Farinas, Julien Pinquier, Xavier -# Aumont, 2016 +# and "Using Phonologically Weighted Levenshtein Distances for the Prediction +# of Microscopic Intelligibility" by Lionel Fontan, Isabelle Ferrané, Jérôme +# Farinas, Julien Pinquier, Xavier Aumont, 2016 # https://hal.archives-ouvertes.fr/hal-01474904/document module Phonetics module Levenshtein extend ::PhoneticsLevenshteinCBinding - def self.distance(str1, str2, verbose = false) - ensure_is_phonetic!(str1, str2) - internal_phonetic_distance( - Phonetics.as_utf_8_long(str1), - Phonetics.as_utf_8_long(str2), - verbose - ) + def inspect_bytes(str) + puts "Rubyland str: #{str.inspect}" + puts "Rubyland bytes: #{str.bytes.inspect}" + testing_codepoints(str) end - def self.ensure_is_phonetic!(str1, str2) - [str1, str2].each do |string| - string.chars.each do |char| - unless Phonetics.phonemes.include?(char) - msg = "#{char.inspect} is not a character in the International Phonetic Alphabet. #{self.class.name} only works with IPA-transcribed strings" - raise ArgumentError, msg - end - end - end + def self.distance(str1, str2, verbose = false) + internal_phonetic_distance(str1, str2, verbose) end end end