lib/licensee/project_files/license_file.rb in licensee-9.14.1 vs lib/licensee/project_files/license_file.rb in licensee-9.15.0

- old
+ new

@@ -8,12 +8,19 @@ # List of extensions to give preference to PREFERRED_EXT = %w[md markdown txt html].freeze PREFERRED_EXT_REGEX = /\.#{Regexp.union(PREFERRED_EXT)}\z/.freeze # Regex to match any extension except .spdx or .header - OTHER_EXT_REGEX = %r{\.(?!spdx|header|gemspec)[^./]+\z}i.freeze + LICENSE_EXT_REGEX = %r{\.(?!spdx|header)[^./]+\z}i.freeze + # Regex to match any extension except a few unlikely as license + # texts with complex filenames + OTHER_EXT_REGEX = %r{\.(?!xml|go|gemspec)[^./]+\z}i.freeze + + # Regex to match any extension + ANY_EXT_REGEX = %r{\.[^./]+\z}i.freeze + # Regex to match, LICENSE, LICENCE, unlicense, etc. LICENSE_REGEX = /(un)?licen[sc]e/i.freeze # Regex to match COPYING, COPYRIGHT, etc. COPYING_REGEX = /copy(ing|right)/i.freeze @@ -24,26 +31,26 @@ # BSD + PATENTS patent file PATENTS_REGEX = /patents/i.freeze # Hash of Regex => score with which to score potential license files FILENAME_REGEXES = { - /\A#{LICENSE_REGEX}\z/ => 1.00, # LICENSE - /\A#{LICENSE_REGEX}#{PREFERRED_EXT_REGEX}\z/ => 0.95, # LICENSE.md - /\A#{COPYING_REGEX}\z/ => 0.90, # COPYING - /\A#{COPYING_REGEX}#{PREFERRED_EXT_REGEX}\z/ => 0.85, # COPYING.md - /\A#{LICENSE_REGEX}#{OTHER_EXT_REGEX}\z/ => 0.80, # LICENSE.textile - /\A#{COPYING_REGEX}#{OTHER_EXT_REGEX}\z/ => 0.75, # COPYING.textile - /\A#{LICENSE_REGEX}[-_]/ => 0.70, # LICENSE-MIT - /\A#{COPYING_REGEX}[-_]/ => 0.65, # COPYING-MIT - /\A\w+[-_]#{LICENSE_REGEX}/ => 0.60, # MIT-LICENSE-MIT - /\A\w+[-_]#{COPYING_REGEX}/ => 0.55, # MIT-COPYING - /\A#{OFL_REGEX}#{PREFERRED_EXT_REGEX}/ => 0.50, # OFL.md - /\A#{OFL_REGEX}#{OTHER_EXT_REGEX}/ => 0.45, # OFL.textile - /\A#{OFL_REGEX}\z/ => 0.40, # OFL - /\A#{PATENTS_REGEX}\z/ => 0.35, # PATENTS - /\A#{PATENTS_REGEX}#{OTHER_EXT_REGEX}\z/ => 0.30, # PATENTS.txt - // => 0.00 # Catch all + /\A#{LICENSE_REGEX}\z/ => 1.00, # LICENSE + /\A#{LICENSE_REGEX}#{PREFERRED_EXT_REGEX}\z/ => 0.95, # LICENSE.md + /\A#{COPYING_REGEX}\z/ => 0.90, # COPYING + /\A#{COPYING_REGEX}#{PREFERRED_EXT_REGEX}\z/ => 0.85, # COPYING.md + /\A#{LICENSE_REGEX}#{LICENSE_EXT_REGEX}\z/ => 0.80, # LICENSE.textile + /\A#{COPYING_REGEX}#{ANY_EXT_REGEX}\z/ => 0.75, # COPYING.textile + /\A#{LICENSE_REGEX}[-_][^.]*#{OTHER_EXT_REGEX}?\z/ => 0.70, # LICENSE-MIT + /\A#{COPYING_REGEX}[-_][^.]*#{OTHER_EXT_REGEX}?\z/ => 0.65, # COPYING-MIT + /\A\w+[-_]#{LICENSE_REGEX}[^.]*#{OTHER_EXT_REGEX}?\z/ => 0.60, # MIT-LICENSE-MIT + /\A\w+[-_]#{COPYING_REGEX}[^.]*#{OTHER_EXT_REGEX}?\z/ => 0.55, # MIT-COPYING + /\A#{OFL_REGEX}#{PREFERRED_EXT_REGEX}/ => 0.50, # OFL.md + /\A#{OFL_REGEX}#{OTHER_EXT_REGEX}/ => 0.45, # OFL.textile + /\A#{OFL_REGEX}\z/ => 0.40, # OFL + /\A#{PATENTS_REGEX}\z/ => 0.35, # PATENTS + /\A#{PATENTS_REGEX}#{OTHER_EXT_REGEX}\z/ => 0.30, # PATENTS.txt + // => 0.00 # Catch all }.freeze # CC-NC and CC-ND are not open source licenses and should not be # detected as CC-BY or CC-BY-SA which are 98%+ similar CC_FALSE_POSITIVE_REGEX = / @@ -54,14 +61,14 @@ [Matchers::Copyright, Matchers::Exact, Matchers::Dice] end def attribution @attribution ||= begin - return unless copyright? || license.content&.include?('[fullname]') - - matches = Matchers::Copyright::REGEX - .match(content_without_title_and_version) - matches[0] if matches + if copyright? || license.content&.include?('[fullname]') + matches = Matchers::Copyright::REGEX + .match(content_without_title_and_version) + matches[0] if matches + end end end # Is this file likely to result in a creative commons false positive? def potential_false_positive?