Sha256: ae0de00e9289e7c64f1f2e06816df25e21ee55d6c98839d1b47618fde940ce42
Contents?: true
Size: 1.96 KB
Versions: 4
Compression:
Stored size: 1.96 KB
Contents
class Licensee module Matchers class Dice def initialize(file) @file = file end # Return the first potential license that is more similar # than the confidence threshold def match return @match if defined? @match matches = potential_licenses.map do |license| if (sim = similarity(license)) >= Licensee.confidence_threshold [license, sim] end end matches.compact! @match = if matches.empty? nil else matches.max_by { |l, sim| sim }.first end end # Sort all licenses, in decending order, by difference in # length to the file # Difference in lengths cannot exceed the file's length * # the confidence threshold / 100 def potential_licenses @potential_licenses ||= begin licenses = Licensee.licenses(:hidden => true) licenses = licenses.select do |license| license.wordset && length_delta(license) <= max_delta end licenses.sort_by { |l| length_delta(l) } end end # Calculate the difference between the file length and a given # license's length def length_delta(license) (@file.wordset.size - license.wordset.size).abs end # Maximum possible difference between file length and license length # for a license to be a potential license to be matched def max_delta @max_delta ||= (@file.wordset.size * (Licensee.confidence_threshold/100.0)) end # Confidence that the matched license is a match def confidence @confidence ||= match ? similarity(match) : 0 end private # Calculate percent changed between file and potential license def similarity(license) overlap = (@file.wordset & license.wordset).size total = @file.wordset.size + license.wordset.size 100.0 * (overlap * 2.0 / total) end end end end
Version data entries
4 entries across 4 versions & 1 rubygems