Sha256: fbe8a2a4923e80eb1c266be00df6a74cc01c22ba709ce66c63d9b31d7df3c8eb
Contents?: true
Size: 1.56 KB
Versions: 4
Compression:
Stored size: 1.56 KB
Contents
require 'set' require 'digest' module Licensee module ContentHelper DIGEST = Digest::SHA1 # A set of each word in the license, without duplicates def wordset @wordset ||= if content_normalized content_normalized.scan(/[\w']+/).to_set end end # Number of characteres in the normalized content def length return 0 unless content_normalized content_normalized.length end # Number of characters that could be added/removed to still be # considered a potential match def max_delta (length * Licensee.inverse_confidence_threshold).to_i end # Given another license or project file, calculates the difference in length def length_delta(other) (length - other.length).abs end # Given another license or project file, calculates the similarity # as a percentage of words in common def similarity(other) overlap = (wordset & other.wordset).size total = wordset.size + other.wordset.size 100.0 * (overlap * 2.0 / total) end # SHA1 of the normalized content def hash @hash ||= DIGEST.hexdigest content_normalized end # Content with copyright header and linebreaks removed def content_normalized return unless content @content_normalized ||= begin content_normalized = content.downcase.strip content_normalized.gsub!(/^#{Matchers::Copyright::REGEX}$/i, '') content_normalized.gsub!(/[=-]{4,}/, '') # Strip HRs from MPL content_normalized.tr("\n", ' ').squeeze(' ').strip end end end end
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
licensee-8.7.0 | lib/licensee/content_helper.rb |
licensee-8.6.1 | lib/licensee/content_helper.rb |
licensee-8.6.0 | lib/licensee/content_helper.rb |
licensee-8.5.0 | lib/licensee/content_helper.rb |