lib/fuzzily/trigram.rb in fuzzily-0.0.1 vs lib/fuzzily/trigram.rb in fuzzily-0.0.2
- old
+ new
@@ -1,25 +1,23 @@
-require 'iconv'
+require 'active_support/core_ext/string/multibyte'
module Fuzzily
module String
def trigrams
- normalized_words.map do |word|
- (0..(word.length - 3)).map { |index| word[index,3] }
- end.flatten.uniq
+ normalized = self.normalize
+ (0..(normalized.length - 3)).map { |index| normalized[index,3] }.uniq
end
- private
+ protected
# Remove accents, downcase, replace spaces and word start with '*',
# return list of normalized words
- def normalized_words
- self.split(/\s+/).map { |word|
- Iconv.iconv('ascii//translit//ignore', 'utf-8', word).first.downcase.gsub(/\W/,'')
- }.
- delete_if(&:empty?).
- map { |word|
- "**#{word}"
- }
+ def normalize
+ # Iconv.iconv('ascii//translit//ignore', 'utf-8', self).first.
+ ActiveSupport::Multibyte::Chars.new(self).
+ mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').downcase.to_s.
+ gsub(/\W/,' ').
+ gsub(/\s+/,'*').
+ gsub(/^/,'**')
end
end
end