lib/fuzzy_match.rb in fuzzy_match-1.3.3 vs lib/fuzzy_match.rb in fuzzy_match-1.4.0
- old
+ new
@@ -1,10 +1,5 @@
-require 'active_support'
-require 'active_support/version'
-if ::ActiveSupport::VERSION::MAJOR >= 3
- require 'active_support/core_ext'
-end
require 'to_regexp'
require 'fuzzy_match/rule'
require 'fuzzy_match/rule/normalizer'
require 'fuzzy_match/rule/stop_word'
@@ -17,15 +12,15 @@
# See the README for more information.
class FuzzyMatch
class << self
def engine
- @@engine ||= :pure_ruby
+ @engine
end
def engine=(alt_engine)
- @@engine = alt_engine
+ @engine = alt_engine
end
def score_class
case engine
when :pure_ruby
@@ -45,10 +40,12 @@
:must_match_grouping => false,
:must_match_at_least_one_word => false,
:gather_last_result => false,
:find_all => false
}
+
+ self.engine = DEFAULT_ENGINE
attr_reader :haystack
attr_reader :groupings
attr_reader :identities
attr_reader :normalizers
@@ -69,11 +66,11 @@
# * :<tt>must_match_grouping</tt> - don't return a match unless the needle fits into one of the groupings you specified
# * :<tt>must_match_at_least_one_word</tt> - don't return a match unless the needle shares at least one word with the match
# * :<tt>first_grouping_decides</tt> - force records into the first grouping they match, rather than choosing a grouping that will give them a higher score
# * :<tt>gather_last_result</tt> - enable <tt>last_result</tt>
def initialize(competitors, options_and_rules = {})
- options_and_rules = options_and_rules.symbolize_keys
+ options_and_rules = options_and_rules.dup
# rules
self.groupings = options_and_rules.delete(:groupings) || options_and_rules.delete(:blockings) || []
self.identities = options_and_rules.delete(:identities) || []
self.normalizers = options_and_rules.delete(:normalizers) || options_and_rules.delete(:tighteners) || []
@@ -85,11 +82,11 @@
options_and_rules[:first_grouping_decides] = deprecated
end
if deprecated = options_and_rules.delete(:must_match_blocking)
options_and_rules[:must_match_grouping] = deprecated
end
- @default_options = options_and_rules.reverse_merge(DEFAULT_OPTIONS).freeze
+ @default_options = DEFAULT_OPTIONS.merge(options_and_rules).freeze
# do this last
self.haystack = competitors
end
@@ -116,16 +113,16 @@
def last_result
@last_result || raise(::RuntimeError, "[fuzzy_match] You can't access the last result until you've run a find with :gather_last_result => true")
end
def find_all(needle, options = {})
- options = options.symbolize_keys.merge(:find_all => true)
+ options = options.merge(:find_all => true)
find needle, options
end
def find(needle, options = {})
- options = options.symbolize_keys.reverse_merge default_options
+ options = default_options.merge options
gather_last_result = options[:gather_last_result]
is_find_all = options[:find_all]
first_grouping_decides = options[:first_grouping_decides]
must_match_grouping = options[:must_match_grouping]
@@ -191,11 +188,13 @@
end
if groupings.any?
joint = passed_word_requirement.select do |straw|
if first_grouping_decides
- groupings.detect { |grouping| grouping.match? needle }.try :join?, needle, straw
+ if first_grouping = groupings.detect { |grouping| grouping.match? needle }
+ first_grouping.join? needle, straw
+ end
else
groupings.any? { |grouping| grouping.join? needle, straw }
end
end
if gather_last_result
@@ -235,25 +234,25 @@
end
end
if gather_last_result
last_result.timeline << <<-EOS
Since there were identities, the competition was reduced to records that might be identical to the needle (in other words, are not certainly different)
-\Identities (first 3): #{identities[0,3].map(&:inspect).join(', ')}
-\tPassed (first 3): #{possibly_identical[0,3].map(&:render).map(&:inspect).join(', ')}
-\tFailed (first 3): #{(joint-possibly_identical)[0,3].map(&:render).map(&:inspect).join(', ')}
+\tIdentities (first 10 of #{identities.length}): #{identities[0,9].map(&:inspect).join(', ')}
+\tPassed (first 10 of #{possibly_identical.length}): #{possibly_identical[0,9].map(&:render).map(&:inspect).join(', ')}
+\tFailed (first 10 of #{(joint-possibly_identical).length}): #{(joint-possibly_identical)[0,9].map(&:render).map(&:inspect).join(', ')}
EOS
end
else
possibly_identical = joint.dup
end
-
+
similarities = possibly_identical.map { |straw| needle.similarity straw }.sort.reverse
if gather_last_result
- last_result.timeline << <<-EOS
+ last_result.timeline << <<-EOS
The competition was sorted in order of similarity to the needle.
-\tSimilar (first 3): #{(similarities)[0,3].map(&:wrapper2).map(&:render).map(&:inspect).join(', ')}
+\tSimilar (first 10 of #{similarities.length}): #{similarities[0,9].map { |s| "#{s.wrapper2.render.inspect} (#{[s.best_score.dices_coefficient_similar, s.best_score.levenshtein_similar].map { |v| '%0.5f' % v }.join('/')})" }.join(', ')}
EOS
end
if is_find_all
return similarities.map { |similarity| similarity.wrapper2.record }
@@ -270,11 +269,14 @@
last_result.timeline << <<-EOS
A winner was determined because the Dice's Coefficient similarity (#{best_similarity.best_score.dices_coefficient_similar}) is greater than zero or because it shared a word with the needle.
EOS
end
elsif gather_last_result
- last_result.timeline << <<-EOS
-No winner assigned because the score of the best similarity (#{best_similarity.try(:wrapper2).try(:record).try(:inspect)}) was zero and it didn't match any words with the needle (#{needle.inspect}).
+ best_similarity_record = if best_similarity and best_similarity.wrapper2
+ best_similarity.wrapper2.record
+ end
+ last_result.timeline << <<-EOS
+No winner assigned because the score of the best similarity (#{best_similarity_record.inspect}) was zero and it didn't match any words with the needle (#{needle.inspect}).
EOS
end
winner
end