lib/fuzzy_match.rb in fuzzy_match-1.3.1 vs lib/fuzzy_match.rb in fuzzy_match-1.3.2
- old
+ new
@@ -3,14 +3,15 @@
if ::ActiveSupport::VERSION::MAJOR >= 3
require 'active_support/core_ext'
end
require 'to_regexp'
-require 'fuzzy_match/normalizer'
-require 'fuzzy_match/stop_word'
-require 'fuzzy_match/blocking'
-require 'fuzzy_match/identity'
+require 'fuzzy_match/rule'
+require 'fuzzy_match/rule/normalizer'
+require 'fuzzy_match/rule/stop_word'
+require 'fuzzy_match/rule/grouping'
+require 'fuzzy_match/rule/identity'
require 'fuzzy_match/result'
require 'fuzzy_match/wrapper'
require 'fuzzy_match/similarity'
require 'fuzzy_match/score'
@@ -42,19 +43,19 @@
end
DEFAULT_ENGINE = :pure_ruby
DEFAULT_OPTIONS = {
- :first_blocking_decides => false,
- :must_match_blocking => false,
+ :first_grouping_decides => false,
+ :must_match_grouping => false,
:must_match_at_least_one_word => false,
:gather_last_result => false,
:find_all => false
}
attr_reader :haystack
- attr_reader :blockings
+ attr_reader :groupings
attr_reader :identities
attr_reader :normalizers
attr_reader :stop_words
attr_reader :read
attr_reader :default_options
@@ -62,50 +63,56 @@
# haystack - a bunch of records that will compete to see who best matches the needle
#
# Rules (can only be specified at initialization or by using a setter)
# * :<tt>normalizers</tt> - regexps (see README)
# * :<tt>identities</tt> - regexps
- # * :<tt>blockings</tt> - regexps
+ # * :<tt>groupings</tt> - regexps
# * :<tt>stop_words</tt> - regexps
#
# Options (can be specified at initialization or when calling #find)
# * :<tt>read</tt> - how to interpret each record in the 'haystack', either a Proc or a symbol
- # * :<tt>must_match_blocking</tt> - don't return a match unless the needle fits into one of the blockings you specified
+ # * :<tt>must_match_grouping</tt> - don't return a match unless the needle fits into one of the groupings you specified
# * :<tt>must_match_at_least_one_word</tt> - don't return a match unless the needle shares at least one word with the match
- # * :<tt>first_blocking_decides</tt> - force records into the first blocking they match, rather than choosing a blocking that will give them a higher score
+ # * :<tt>first_grouping_decides</tt> - force records into the first grouping they match, rather than choosing a grouping that will give them a higher score
# * :<tt>gather_last_result</tt> - enable <tt>last_result</tt>
def initialize(competitors, options_and_rules = {})
options_and_rules = options_and_rules.symbolize_keys
# rules
- self.blockings = options_and_rules.delete(:blockings) || []
+ self.groupings = options_and_rules.delete(:groupings) || options_and_rules.delete(:blockings) || []
self.identities = options_and_rules.delete(:identities) || []
self.normalizers = options_and_rules.delete(:normalizers) || options_and_rules.delete(:tighteners) || []
self.stop_words = options_and_rules.delete(:stop_words) || []
@read = options_and_rules.delete(:read) || options_and_rules.delete(:haystack_reader)
# options
+ if deprecated = options_and_rules.delete(:first_blocking_decides)
+ options_and_rules[:first_grouping_decides] = deprecated
+ end
+ if deprecated = options_and_rules.delete(:must_match_blocking)
+ options_and_rules[:must_match_grouping] = deprecated
+ end
@default_options = options_and_rules.reverse_merge(DEFAULT_OPTIONS).freeze
# do this last
self.haystack = competitors
end
- def blockings=(ary)
- @blockings = ary.map { |regexp_or_str| Blocking.new regexp_or_str }
+ def groupings=(ary)
+ @groupings = ary.map { |regexp_or_str| Rule::Grouping.new regexp_or_str }
end
def identities=(ary)
- @identities = ary.map { |regexp_or_str| Identity.new regexp_or_str }
+ @identities = ary.map { |regexp_or_str| Rule::Identity.new regexp_or_str }
end
def normalizers=(ary)
- @normalizers = ary.map { |regexp_or_str| Normalizer.new regexp_or_str }
+ @normalizers = ary.map { |regexp_or_str| Rule::Normalizer.new regexp_or_str }
end
def stop_words=(ary)
- @stop_words = ary.map { |regexp_or_str| StopWord.new regexp_or_str }
+ @stop_words = ary.map { |regexp_or_str| Rule::StopWord.new regexp_or_str }
end
def haystack=(ary)
@haystack = ary.map { |competitor| Wrapper.new self, competitor }
end
@@ -122,12 +129,12 @@
def find(needle, options = {})
options = options.symbolize_keys.reverse_merge default_options
gather_last_result = options[:gather_last_result]
is_find_all = options[:find_all]
- first_blocking_decides = options[:first_blocking_decides]
- must_match_blocking = options[:must_match_blocking]
+ first_grouping_decides = options[:first_grouping_decides]
+ must_match_grouping = options[:must_match_grouping]
must_match_at_least_one_word = options[:must_match_at_least_one_word]
if gather_last_result
@last_result = Result.new
last_result.read = read
@@ -140,11 +147,11 @@
end
if gather_last_result
last_result.normalizers = normalizers
last_result.identities = identities
- last_result.blockings = blockings
+ last_result.groupings = groupings
last_result.stop_words = stop_words
end
needle = Wrapper.new self, needle, true
@@ -154,15 +161,15 @@
The needle's #{needle.variants.length} variants were enumerated.
\tVariants: #{needle.variants.map(&:inspect).join(', ')}
EOS
end
- if must_match_blocking and blockings.any? and blockings.none? { |blocking| blocking.match? needle }
+ if must_match_grouping and groupings.any? and groupings.none? { |grouping| grouping.match? needle }
if gather_last_result
last_result.timeline << <<-EOS
-The needle didn't match any of the #{blockings.length} blocking, which was a requirement.
-\tBlockings (first 3): #{blockings[0,3].map(&:inspect).join(', ')}
+The needle didn't match any of the #{groupings.length} grouping, which was a requirement.
+\tGroupings (first 3): #{groupings[0,3].map(&:inspect).join(', ')}
EOS
end
if is_find_all
return []
@@ -185,35 +192,35 @@
end
else
passed_word_requirement = haystack
end
- if blockings.any?
+ if groupings.any?
joint = passed_word_requirement.select do |straw|
- if first_blocking_decides
- blockings.detect { |blocking| blocking.match? needle }.try :join?, needle, straw
+ if first_grouping_decides
+ groupings.detect { |grouping| grouping.match? needle }.try :join?, needle, straw
else
- blockings.any? { |blocking| blocking.join? needle, straw }
+ groupings.any? { |grouping| grouping.join? needle, straw }
end
end
if gather_last_result
last_result.timeline << <<-EOS
-Since there were blockings, the competition was reduced to records in the same block as the needle.
-\tBlockings (first 3): #{blockings[0,3].map(&:inspect).join(', ')}
+Since there were groupings, the competition was reduced to records in the same group as the needle.
+\tGroupings (first 3): #{groupings[0,3].map(&:inspect).join(', ')}
\tPassed (first 3): #{joint[0,3].map(&:render).map(&:inspect).join(', ')}
\tFailed (first 3): #{(passed_word_requirement-joint)[0,3].map(&:render).map(&:inspect).join(', ')}
EOS
end
else
joint = passed_word_requirement.dup
end
if joint.none?
- if must_match_blocking
+ if must_match_grouping
if gather_last_result
last_result.timeline << <<-EOS
-Since :must_match_at_least_one_word => true and none of the competition was in the same block as the needle, the search stopped.
+Since :must_match_at_least_one_word => true and none of the competition was in the same group as the needle, the search stopped.
EOS
end
if is_find_all
return []
else
@@ -254,23 +261,24 @@
if is_find_all
return similarities.map { |similarity| similarity.wrapper2.record }
end
+ best_similarity = similarities.first
winner = nil
- if best_similarity = similarities.first and best_similarity.best_score.dices_coefficient_similar > 0
+ if best_similarity and (best_similarity.best_score.dices_coefficient_similar > 0 or (needle.words & best_similarity.wrapper2.words).any?)
winner = best_similarity.wrapper2.record
if gather_last_result
last_result.winner = winner
last_result.score = best_similarity.best_score.dices_coefficient_similar
last_result.timeline << <<-EOS
-A winner was determined because the similarity score #{best_similarity.best_score.dices_coefficient_similar} is greater than zero.
+A winner was determined because the Dice's Coefficient similarity (#{best_similarity.best_score.dices_coefficient_similar}) is greater than zero or because it shared a word with the needle.
EOS
end
elsif gather_last_result
last_result.timeline << <<-EOS
-No winner assigned because similarity score was zero.
+No winner assigned because the score of the best similarity (#{best_similarity.try(:wrapper2).try(:record).try(:inspect)}) was zero and it didn't match any words with the needle (#{needle.inspect}).
EOS
end
winner
end