lib/spandx/content.rb in spandx-0.4.1 vs lib/spandx/content.rb in spandx-0.5.0
- old
+ new
@@ -1,34 +1,41 @@
# frozen_string_literal: true
module Spandx
class Content
- attr_reader :raw, :threshold
+ attr_reader :raw
- def initialize(raw, threshold: 89.0)
- @threshold = threshold
+ def initialize(raw)
@raw = raw
end
def tokens
@tokens ||= tokenize(canonicalize(raw)).to_set
end
def similar?(other, algorithm: :dice_coefficient)
case algorithm
when :dice_coefficient
- similarity_score(other) > threshold
+ similarity_score(other, algorithm: algorithm) > 89.0
when :levenshtein
- similarity_score(other) < threshold
+ similarity_score(other, algorithm: algorithm) < 3
+ when :jaro_winkler
+ similarity_score(other, algorithm: algorithm) > 89.0
end
end
def similarity_score(other, algorithm: :dice_coefficient)
case algorithm
when :dice_coefficient
dice_coefficient(other)
when :levenshtein
+ require 'text'
+
Text::Levenshtein.distance(raw, other.raw, 100)
+ when :jaro_winkler
+ require 'jaro_winkler'
+
+ JaroWinkler.distance(raw, other.raw) * 100.0
end
end
private