lib/fuzzily/trigram.rb in fuzzily-0.0.1 vs lib/fuzzily/trigram.rb in fuzzily-0.0.2

- old
+ new

@@ -1,25 +1,23 @@ -require 'iconv' +require 'active_support/core_ext/string/multibyte' module Fuzzily module String def trigrams - normalized_words.map do |word| - (0..(word.length - 3)).map { |index| word[index,3] } - end.flatten.uniq + normalized = self.normalize + (0..(normalized.length - 3)).map { |index| normalized[index,3] }.uniq end - private + protected # Remove accents, downcase, replace spaces and word start with '*', # return list of normalized words - def normalized_words - self.split(/\s+/).map { |word| - Iconv.iconv('ascii//translit//ignore', 'utf-8', word).first.downcase.gsub(/\W/,'') - }. - delete_if(&:empty?). - map { |word| - "**#{word}" - } + def normalize + # Iconv.iconv('ascii//translit//ignore', 'utf-8', self).first. + ActiveSupport::Multibyte::Chars.new(self). + mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').downcase.to_s. + gsub(/\W/,' '). + gsub(/\s+/,'*'). + gsub(/^/,'**') end end end