Sha256: 7192f9668abbf3c2f6c63530973360d1e09a71a3ab97c0e5f13edc8dff8a691d

Contents?: true

Size: 1.94 KB

Versions: 1

Compression:

Stored size: 1.94 KB

Contents

# encoding: UTF-8

class Phonetizer
  
  def self.near_match(a_word, normalize_ending = false)
    a_word = a_word.strip rescue ''
    return '' if a_word == ''
    a_word = Normalizer.normalize a_word
    case a_word
      when /^AE/
        a_word = 'E' + a_word[2..-1]
      when /^CN/
        a_word = 'N' + a_word[2..-1]
      when /^CT/
        a_word = 'T' + a_word[2..-1]
      when /^CZ/
        a_word = 'C' + a_word[2..-1]
      when /^DJ/
        a_word = 'J' + a_word[2..-1]
      when /^EA/
        a_word = 'E' + a_word[2..-1]
      when /^EU/
        a_word = 'U' + a_word[2..-1]
      when /^GN/
        a_word = 'N' + a_word[2..-1]
      when /^KN/
        a_word = 'N' + a_word[2..-1]
      when /^MC/
        a_word = 'MAC' + a_word[2..-1]
      when /^MN/
        a_word = 'N' + a_word[2..-1]
      when /^OE/
        a_word = 'E' + a_word[2..-1]
      when /^QU/
        a_word = 'Q' + a_word[2..-1]
      when /^PS/
        a_word = 'S' + a_word[2..-1]
      when /^PT/
        a_word = 'T' + a_word[2..-1]
      when /^TS/
        a_word = 'S' + a_word[2..-1]
      when /^WR/
        a_word = 'R' + a_word[2..-1]
      when /^X/
        a_word = 'Z' + a_word[1..-1]
    end
    first_char = a_word.split('')[0]
    rest_chars = a_word.split('')[1..-1].join('')   
    rest_chars.gsub!('AE', 'I')
    rest_chars.gsub!('IA', 'A')
    rest_chars.gsub!('OE', 'I')
    rest_chars.gsub!('OI', 'A')
    rest_chars.gsub!('SC', 'S')
    rest_chars.gsub!('H', '')
    rest_chars.tr!('EOUYKZ', 'IAIICS')
    a_word = (first_char + rest_chars).squeeze
    
    if normalize_ending && a_word.size > 4
      a_word = self.normalize_ending(a_word)
    end
    a_word
  end
    
  protected
    
  def self.normalize_ending(a_word)
      # -- deal with variant endings -is (includes -us, -ys, -es), -im (was -um), -as (-os)
      # -- at the end of a string translate all to -a
      a_word.gsub!(/IS$/, 'A')
      a_word.gsub!(/IM$/, 'A')
      a_word.gsub(/AS$/, 'A')
  end
  
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
dimus-taxamatch_rb-0.1.1 lib/taxamatch_rb/phonetizer.rb