Sha256: 4e6a6852c7909ee5e8a33c1470e6c662829190b038c14d5df0c25eb2f4f90ba1
Contents?: true
Size: 1.38 KB
Versions: 2
Compression:
Stored size: 1.38 KB
Contents
# frozen_string_literal: true require_relative 'c_levenshtein' # Using the Damerau version of the Levenshtein algorithm, with phonetic feature # count used instead of a binary edit distance calculation # # This implementation is almost entirely taken from the damerau-levenshtein gem # (https://github.com/GlobalNamesArchitecture/damerau-levenshtein/tree/master/ext/damerau_levenshtein). # The implementation is modified based on "Using Phonologically Weighted # Levenshtein Distances for the Prediction of Microscopic Intelligibility" by # Lionel Fontan, Isabelle Ferrané, Jérôme Farinas, Julien Pinquier, Xavier # Aumont, 2016 # https://hal.archives-ouvertes.fr/hal-01474904/document module Phonetics module Levenshtein extend ::PhoneticsLevenshteinCBinding def self.distance(str1, str2, verbose = false) ensure_is_phonetic!(str1, str2) internal_phonetic_distance( Phonetics.as_utf_8_long(str1), Phonetics.as_utf_8_long(str2), verbose ) end def self.ensure_is_phonetic!(str1, str2) [str1, str2].each do |string| string.chars.each do |char| unless Phonetics.phonemes.include?(char) msg = "#{char.inspect} is not a character in the International Phonetic Alphabet. #{self.class.name} only works with IPA-transcribed strings" raise ArgumentError, msg end end end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
phonetics-1.5.4 | lib/phonetics/levenshtein.rb |
phonetics-1.5.3 | lib/phonetics/levenshtein.rb |