lib/mongoid_fulltext.rb in mongoid_fulltext-0.5.2 vs lib/mongoid_fulltext.rb in mongoid_fulltext-0.5.3

- old
+ new

@@ -27,10 +27,11 @@ :word_separators => ' ', :ngram_width => 3, :max_ngrams_to_search => 6, :apply_prefix_scoring_to_all_words => true, :index_full_words => true, + :index_short_prefixes => false, :max_candidate_set_size => 1000, :remove_accents => true, :stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if', 'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him', 'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and', @@ -216,22 +217,35 @@ {:ngram => filtered_str[i..i+config[:ngram_width]-1], :score => score} end # If an ngram appears multiple times in the query string, keep the max score ngram_array = ngram_array.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} } - + + # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1) + if config[:index_short_prefixes] + prefixes_seen = {} + filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word| + next if word.length < config[:ngram_width]-1 + prefix = word[0...config[:ngram_width]-1] + if prefixes_seen[prefix].nil? and (config[:stop_words][word].nil? or word == filtered_str) + ngram_array << {:ngram => prefix, :score => 1} + prefixes_seen[prefix] = true + end + end + end + # Add records to the array of ngrams for each full word in the string that isn't a stop word - if (config[:index_full_words]) + if config[:index_full_words] full_words_seen = {} filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word| - if word.length > 1 and full_words_seen[word].nil? and config[:stop_words][word].nil? + if word.length > 1 and full_words_seen[word].nil? and (config[:stop_words][word].nil? or word == filtered_str) ngram_array << {:ngram => word, :score => 1} full_words_seen[word] = true end end end - # If an ngram appears as a full word and an ngram, keep the sum of the two scores + # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores Hash[ngram_array.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }] end def remove_from_ngram_index self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|