Sha256: 4e6a6852c7909ee5e8a33c1470e6c662829190b038c14d5df0c25eb2f4f90ba1

Contents?: true

Size: 1.38 KB

Versions: 2

Compression:

Stored size: 1.38 KB

Contents

# frozen_string_literal: true

require_relative 'c_levenshtein'
# Using the Damerau version of the Levenshtein algorithm, with phonetic feature
# count used instead of a binary edit distance calculation
#
# This implementation is almost entirely taken from the damerau-levenshtein gem
# (https://github.com/GlobalNamesArchitecture/damerau-levenshtein/tree/master/ext/damerau_levenshtein).
# The implementation is modified based on "Using Phonologically Weighted
# Levenshtein Distances for the Prediction of Microscopic Intelligibility" by
# Lionel Fontan, Isabelle Ferrané, Jérôme Farinas, Julien Pinquier, Xavier
# Aumont, 2016
# https://hal.archives-ouvertes.fr/hal-01474904/document
module Phonetics
  module Levenshtein
    extend ::PhoneticsLevenshteinCBinding

    def self.distance(str1, str2, verbose = false)
      ensure_is_phonetic!(str1, str2)
      internal_phonetic_distance(
        Phonetics.as_utf_8_long(str1),
        Phonetics.as_utf_8_long(str2),
        verbose
      )
    end

    def self.ensure_is_phonetic!(str1, str2)
      [str1, str2].each do |string|
        string.chars.each do |char|
          unless Phonetics.phonemes.include?(char)
            msg = "#{char.inspect} is not a character in the International Phonetic Alphabet. #{self.class.name} only works with IPA-transcribed strings"
            raise ArgumentError, msg
          end
        end
      end
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
phonetics-1.5.4 lib/phonetics/levenshtein.rb
phonetics-1.5.3 lib/phonetics/levenshtein.rb