Sha256: c26b29d5c357d377a09722776ee2e8e82135141c03dafd827934d8979c711ff6

Contents?: true

Size: 1.52 KB

Versions: 11

Compression:

Stored size: 1.52 KB

Contents

# frozen_string_literal: true

module Spandx
  module Core
    class Content
      attr_reader :raw

      def initialize(raw)
        @raw = raw
      end

      def tokens
        @tokens ||= tokenize(canonicalize(raw)).to_set
      end

      def similar?(other, algorithm: :dice_coefficient)
        case algorithm
        when :dice_coefficient
          similarity_score(other, algorithm: algorithm) > 89.0
        when :levenshtein
          similarity_score(other, algorithm: algorithm) < 3
        when :jaro_winkler
          similarity_score(other, algorithm: algorithm) > 89.0
        end
      end

      def similarity_score(other, algorithm: :dice_coefficient)
        case algorithm
        when :dice_coefficient
          dice_coefficient(other)
        when :levenshtein
          require 'text'

          Text::Levenshtein.distance(raw, other.raw, 100)
        when :jaro_winkler
          require 'jaro_winkler'

          JaroWinkler.distance(raw, other.raw) * 100.0
        end
      end

      private

      def canonicalize(content)
        content&.downcase
      end

      def tokenize(content)
        content.to_s.scan(/[a-zA-Z]+/)
      end

      def blank?(content)
        content.nil? || content.chomp.strip.empty?
      end

      # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby
      def dice_coefficient(other)
        overlap = (tokens & other.tokens).size
        total = tokens.size + other.tokens.size
        100.0 * (overlap * 2.0 / total)
      end
    end
  end
end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
spandx-0.12.3 lib/spandx/core/content.rb
spandx-0.12.2 lib/spandx/core/content.rb
spandx-0.12.1 lib/spandx/core/content.rb
spandx-0.12.0 lib/spandx/core/content.rb
spandx-0.11.0 lib/spandx/core/content.rb
spandx-0.10.1 lib/spandx/core/content.rb
spandx-0.10.0 lib/spandx/core/content.rb
spandx-0.9.0 lib/spandx/core/content.rb
spandx-0.8.0 lib/spandx/core/content.rb
spandx-0.7.0 lib/spandx/core/content.rb
spandx-0.6.0 lib/spandx/core/content.rb