lib/spandx/content.rb in spandx-0.4.1 vs lib/spandx/content.rb in spandx-0.5.0

- old
+ new

@@ -1,34 +1,41 @@ # frozen_string_literal: true module Spandx class Content - attr_reader :raw, :threshold + attr_reader :raw - def initialize(raw, threshold: 89.0) - @threshold = threshold + def initialize(raw) @raw = raw end def tokens @tokens ||= tokenize(canonicalize(raw)).to_set end def similar?(other, algorithm: :dice_coefficient) case algorithm when :dice_coefficient - similarity_score(other) > threshold + similarity_score(other, algorithm: algorithm) > 89.0 when :levenshtein - similarity_score(other) < threshold + similarity_score(other, algorithm: algorithm) < 3 + when :jaro_winkler + similarity_score(other, algorithm: algorithm) > 89.0 end end def similarity_score(other, algorithm: :dice_coefficient) case algorithm when :dice_coefficient dice_coefficient(other) when :levenshtein + require 'text' + Text::Levenshtein.distance(raw, other.raw, 100) + when :jaro_winkler + require 'jaro_winkler' + + JaroWinkler.distance(raw, other.raw) * 100.0 end end private