lib/mongoid_fulltext.rb in mongoid_fulltext-0.4.3 vs lib/mongoid_fulltext.rb in mongoid_fulltext-0.4.4
- old
+ new
@@ -1,6 +1,7 @@
require 'mongoid_indexes'
+require 'unicode_utils'
module Mongoid::FullTextSearch
extend ActiveSupport::Concern
included do
@@ -25,11 +26,12 @@
:word_separators => ' ',
:ngram_width => 3,
:max_ngrams_to_search => 6,
:apply_prefix_scoring_to_all_words => true,
:index_full_words => true,
- :max_candidate_set_size => 1000
+ :max_candidate_set_size => 1000,
+ :remove_accents => true
}
config.update(options)
args = [:to_s] if args.empty?
@@ -41,10 +43,11 @@
before_save :update_ngram_index
before_destroy :remove_from_ngram_index
end
def create_fulltext_indexes
+ return unless self.mongoid_fulltext_config
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
fulltext_search_ensure_indexes(index_name, fulltext_config)
end
end
@@ -167,10 +170,19 @@
end
# returns an [ngram, score] [ngram, position] pair
def all_ngrams(str, config, bound_number_returned = true)
return {} if str.nil? or str.length < config[:ngram_width]
- filtered_str = str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
+
+ filtered_str = String.new(str)
+ if config[:remove_accents]
+ if str.encoding.name == "ASCII-8BIT"
+ filtered_str = CGI.unescape(filtered_str)
+ end
+ filtered_str = UnicodeUtils.nfkd(filtered_str).gsub(/[^\x00-\x7F]/,'')
+ end
+
+ filtered_str = filtered_str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
if bound_number_returned
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
else
step_size = 1