lib/fuzzy_match.rb in fuzzy_match-1.3.3 vs lib/fuzzy_match.rb in fuzzy_match-1.4.0

- old
+ new

@@ -1,10 +1,5 @@ -require 'active_support' -require 'active_support/version' -if ::ActiveSupport::VERSION::MAJOR >= 3 - require 'active_support/core_ext' -end require 'to_regexp' require 'fuzzy_match/rule' require 'fuzzy_match/rule/normalizer' require 'fuzzy_match/rule/stop_word' @@ -17,15 +12,15 @@ # See the README for more information. class FuzzyMatch class << self def engine - @@engine ||= :pure_ruby + @engine end def engine=(alt_engine) - @@engine = alt_engine + @engine = alt_engine end def score_class case engine when :pure_ruby @@ -45,10 +40,12 @@ :must_match_grouping => false, :must_match_at_least_one_word => false, :gather_last_result => false, :find_all => false } + + self.engine = DEFAULT_ENGINE attr_reader :haystack attr_reader :groupings attr_reader :identities attr_reader :normalizers @@ -69,11 +66,11 @@ # * :<tt>must_match_grouping</tt> - don't return a match unless the needle fits into one of the groupings you specified # * :<tt>must_match_at_least_one_word</tt> - don't return a match unless the needle shares at least one word with the match # * :<tt>first_grouping_decides</tt> - force records into the first grouping they match, rather than choosing a grouping that will give them a higher score # * :<tt>gather_last_result</tt> - enable <tt>last_result</tt> def initialize(competitors, options_and_rules = {}) - options_and_rules = options_and_rules.symbolize_keys + options_and_rules = options_and_rules.dup # rules self.groupings = options_and_rules.delete(:groupings) || options_and_rules.delete(:blockings) || [] self.identities = options_and_rules.delete(:identities) || [] self.normalizers = options_and_rules.delete(:normalizers) || options_and_rules.delete(:tighteners) || [] @@ -85,11 +82,11 @@ options_and_rules[:first_grouping_decides] = deprecated end if deprecated = options_and_rules.delete(:must_match_blocking) options_and_rules[:must_match_grouping] = deprecated end - @default_options = options_and_rules.reverse_merge(DEFAULT_OPTIONS).freeze + @default_options = DEFAULT_OPTIONS.merge(options_and_rules).freeze # do this last self.haystack = competitors end @@ -116,16 +113,16 @@ def last_result @last_result || raise(::RuntimeError, "[fuzzy_match] You can't access the last result until you've run a find with :gather_last_result => true") end def find_all(needle, options = {}) - options = options.symbolize_keys.merge(:find_all => true) + options = options.merge(:find_all => true) find needle, options end def find(needle, options = {}) - options = options.symbolize_keys.reverse_merge default_options + options = default_options.merge options gather_last_result = options[:gather_last_result] is_find_all = options[:find_all] first_grouping_decides = options[:first_grouping_decides] must_match_grouping = options[:must_match_grouping] @@ -191,11 +188,13 @@ end if groupings.any? joint = passed_word_requirement.select do |straw| if first_grouping_decides - groupings.detect { |grouping| grouping.match? needle }.try :join?, needle, straw + if first_grouping = groupings.detect { |grouping| grouping.match? needle } + first_grouping.join? needle, straw + end else groupings.any? { |grouping| grouping.join? needle, straw } end end if gather_last_result @@ -235,25 +234,25 @@ end end if gather_last_result last_result.timeline << <<-EOS Since there were identities, the competition was reduced to records that might be identical to the needle (in other words, are not certainly different) -\Identities (first 3): #{identities[0,3].map(&:inspect).join(', ')} -\tPassed (first 3): #{possibly_identical[0,3].map(&:render).map(&:inspect).join(', ')} -\tFailed (first 3): #{(joint-possibly_identical)[0,3].map(&:render).map(&:inspect).join(', ')} +\tIdentities (first 10 of #{identities.length}): #{identities[0,9].map(&:inspect).join(', ')} +\tPassed (first 10 of #{possibly_identical.length}): #{possibly_identical[0,9].map(&:render).map(&:inspect).join(', ')} +\tFailed (first 10 of #{(joint-possibly_identical).length}): #{(joint-possibly_identical)[0,9].map(&:render).map(&:inspect).join(', ')} EOS end else possibly_identical = joint.dup end - + similarities = possibly_identical.map { |straw| needle.similarity straw }.sort.reverse if gather_last_result - last_result.timeline << <<-EOS + last_result.timeline << <<-EOS The competition was sorted in order of similarity to the needle. -\tSimilar (first 3): #{(similarities)[0,3].map(&:wrapper2).map(&:render).map(&:inspect).join(', ')} +\tSimilar (first 10 of #{similarities.length}): #{similarities[0,9].map { |s| "#{s.wrapper2.render.inspect} (#{[s.best_score.dices_coefficient_similar, s.best_score.levenshtein_similar].map { |v| '%0.5f' % v }.join('/')})" }.join(', ')} EOS end if is_find_all return similarities.map { |similarity| similarity.wrapper2.record } @@ -270,11 +269,14 @@ last_result.timeline << <<-EOS A winner was determined because the Dice's Coefficient similarity (#{best_similarity.best_score.dices_coefficient_similar}) is greater than zero or because it shared a word with the needle. EOS end elsif gather_last_result - last_result.timeline << <<-EOS -No winner assigned because the score of the best similarity (#{best_similarity.try(:wrapper2).try(:record).try(:inspect)}) was zero and it didn't match any words with the needle (#{needle.inspect}). + best_similarity_record = if best_similarity and best_similarity.wrapper2 + best_similarity.wrapper2.record + end + last_result.timeline << <<-EOS +No winner assigned because the score of the best similarity (#{best_similarity_record.inspect}) was zero and it didn't match any words with the needle (#{needle.inspect}). EOS end winner end