Sha256: f2cdba38deb97377197da187ce8a12eb99419a8de5598085184437648efa715c
Contents?: true
Size: 815 Bytes
Versions: 2
Compression:
Stored size: 815 Bytes
Contents
# frozen_string_literal: true module Spandx class Content attr_reader :tokens, :threshold def initialize(content, threshold: 89.0) @threshold = threshold @tokens = tokenize(canonicalize(content)).to_set end def similar?(other) similarity_score(other) > threshold end # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby def similarity_score(other) overlap = (tokens & other.tokens).size total = tokens.size + other.tokens.size 100.0 * (overlap * 2.0 / total) end private def canonicalize(content) content&.downcase end def tokenize(content) content.to_s.scan(/[a-zA-Z]+/) end def blank?(content) content.nil? || content.chomp.strip.empty? end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
spandx-0.1.7 | lib/spandx/content.rb |
spandx-0.1.6 | lib/spandx/content.rb |