Sha256: 7192f9668abbf3c2f6c63530973360d1e09a71a3ab97c0e5f13edc8dff8a691d
Contents?: true
Size: 1.94 KB
Versions: 1
Compression:
Stored size: 1.94 KB
Contents
# encoding: UTF-8 class Phonetizer def self.near_match(a_word, normalize_ending = false) a_word = a_word.strip rescue '' return '' if a_word == '' a_word = Normalizer.normalize a_word case a_word when /^AE/ a_word = 'E' + a_word[2..-1] when /^CN/ a_word = 'N' + a_word[2..-1] when /^CT/ a_word = 'T' + a_word[2..-1] when /^CZ/ a_word = 'C' + a_word[2..-1] when /^DJ/ a_word = 'J' + a_word[2..-1] when /^EA/ a_word = 'E' + a_word[2..-1] when /^EU/ a_word = 'U' + a_word[2..-1] when /^GN/ a_word = 'N' + a_word[2..-1] when /^KN/ a_word = 'N' + a_word[2..-1] when /^MC/ a_word = 'MAC' + a_word[2..-1] when /^MN/ a_word = 'N' + a_word[2..-1] when /^OE/ a_word = 'E' + a_word[2..-1] when /^QU/ a_word = 'Q' + a_word[2..-1] when /^PS/ a_word = 'S' + a_word[2..-1] when /^PT/ a_word = 'T' + a_word[2..-1] when /^TS/ a_word = 'S' + a_word[2..-1] when /^WR/ a_word = 'R' + a_word[2..-1] when /^X/ a_word = 'Z' + a_word[1..-1] end first_char = a_word.split('')[0] rest_chars = a_word.split('')[1..-1].join('') rest_chars.gsub!('AE', 'I') rest_chars.gsub!('IA', 'A') rest_chars.gsub!('OE', 'I') rest_chars.gsub!('OI', 'A') rest_chars.gsub!('SC', 'S') rest_chars.gsub!('H', '') rest_chars.tr!('EOUYKZ', 'IAIICS') a_word = (first_char + rest_chars).squeeze if normalize_ending && a_word.size > 4 a_word = self.normalize_ending(a_word) end a_word end protected def self.normalize_ending(a_word) # -- deal with variant endings -is (includes -us, -ys, -es), -im (was -um), -as (-os) # -- at the end of a string translate all to -a a_word.gsub!(/IS$/, 'A') a_word.gsub!(/IM$/, 'A') a_word.gsub(/AS$/, 'A') end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
dimus-taxamatch_rb-0.1.1 | lib/taxamatch_rb/phonetizer.rb |