lib/mongoid_fulltext.rb in mongoid_fulltext-0.4.5 vs lib/mongoid_fulltext.rb in mongoid_fulltext-0.5.0
- old
+ new
@@ -151,11 +151,10 @@
}
end
all_scores.concat(scores)
end
all_scores.sort!{ |document1, document2| -document1[:score] <=> -document2[:score] }
-
instantiate_mapreduce_results(all_scores[0..max_results-1], { :return_scores => return_scores })
end
def instantiate_mapreduce_result(result)
result[:clazz].constantize.find(:first, :conditions => {'_id' => result[:id]})
@@ -187,36 +186,33 @@
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
else
step_size = 1
end
- # array of ngrams
+ # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the input string
ngram_ary = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
if i == 0 or (config[:apply_prefix_scoring_to_all_words] and \
config[:word_separators].has_key?(filtered_str[i-1].chr))
score = Math.sqrt(1 + 1.0/filtered_str.length)
else
score = Math.sqrt(2.0/filtered_str.length)
end
- [filtered_str[i..i+config[:ngram_width]-1], score]
+ {:ngram => filtered_str[i..i+config[:ngram_width]-1], :score => score}
end
+
+ # If an ngram appears multiple times in the query string, keep the max score
+ ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
if (config[:index_full_words])
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
if word.length >= config[:ngram_width]
- ngram_ary << [ word, 1 ]
+ ngram_ary << {:ngram => word, :score => 1}
end
end
end
-
- ngram_hash = {}
-
- # deduplicate, and keep the highest score
- ngram_ary.each do |ngram, score, position|
- ngram_hash[ngram] = [ngram_hash[ngram] || 0, score].max
- end
-
- ngram_hash
+
+ # If an ngram appears as a full word and an ngram, keep the sum of the two scores
+ Hash[ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }]
end
def remove_from_ngram_index
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
coll = collection.db.collection(index_name)