lib/eco/data/fuzzy_match.rb in eco-helpers-2.4.9 vs lib/eco/data/fuzzy_match.rb in eco-helpers-2.5.1
- old
+ new
@@ -1,9 +1,5 @@
-require 'fuzzy_match'
-require 'amatch'
-require 'jaro_winkler'
-
require_relative 'fuzzy_match/stop_words'
require_relative 'fuzzy_match/array_helpers'
require_relative 'fuzzy_match/string_helpers'
require_relative 'fuzzy_match/pairing'
require_relative 'fuzzy_match/chars_position_score'
@@ -31,10 +27,12 @@
return 0 if !str1 || !str2
options = {
ignore_case: true,
weight: 0.25
}.merge(options)
+
+ require 'jaro_winkler'
JaroWinkler.distance(str1, str2, **options)
end
end
@@ -61,11 +59,14 @@
def fuzzy_match(haystack_data = nil, **options)
if instance_variable_defined?(:@fuzzy_match) && !haystack_data
return @fuzzy_match if fuzzy_match_options == fuzzy_match_options(options)
end
@fuzzy_options = options
+
# make it run with a native C extension (for better performance: ~130 % increase of performance)
+ require 'fuzzy_match'
+ require 'amatch'
::FuzzyMatch.engine = :amatch
@fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
end
# TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
@@ -113,10 +114,13 @@
unless istr = needle_str
dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
end
end
+ require 'fuzzy_match'
+ require 'amatch'
res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
+
dice ||= res&.dices_coefficient_similar || 0
lev ||= res&.levenshtein_similar || 0
jaro_res ||= jaro(nstr, istr)
ngram_res ||= ngram(nstr, istr)
wngram_res ||= words_ngram(nstr, istr)