# frozen_string_literal: true # Released under the MIT License. # Copyright, 2022-2024, by Samuel Williams. require 'rugged' require 'yaml' module Bake module Modernize # Support the analysis of authorship and license details. module License GIT_BLAME_IGNORE_REVS = ".git-blame-ignore-revs" # Represents revisions to skip when analyzing authorship. class SkipList # Load the skip list from a directory. def self.for(root) full_path = File.join(root, GIT_BLAME_IGNORE_REVS) if File.exist?(full_path) skip_list = self.new skip_list.extract(full_path) return skip_list end end # Create a new skip list with the given revisions. # # @parameter revisions [Array(String)] The revisions to skip. def initialize(revisions = []) @revisions = Set.new(revisions) end # Extract the revisions from the given path. def extract(path) File.open(path, 'r') do |file| file.each_line do |line| # Skip empty lines and comments next if line =~ /^\s*(#|$)/ # Parse line @revisions << line.strip end end end # Check if the given commit should be ignored. def ignore?(commit) @revisions.include?(commit.oid) end end # Represents a mailmap file which maps commit emails to proper names. class Mailmap # Load the mailmap from a directory. def self.for(root) full_path = File.join(root, '.mailmap') if File.exist?(full_path) mailmap = self.new mailmap.extract(full_path) return mailmap end end # Create a new, empty, mailmap. def initialize @names = {} end # @attribute [Hash(String, String)] The mapping of commit emails to proper names. attr :names # Extract the mailmap from the given path. def extract(path) File.open(path, 'r') do |file| file.each_line do |line| # Skip comments next if line =~ /^#/ # Skip empty lines next if line =~ /^\s*$/ # Parse line user = extract_from_line(line) if commit_email = user[:commit_email] and proper_name = user[:proper_name] @names[commit_email] = proper_name end end end end # Format: Proper Name Commit Name PATTERN = / (?[^<]+)? (\s+<(?[^>]+)>)? (\s+(?[^<]+)?)? \s+<(?[^>]+)> /x # Extract the mailmap format from a line of input. def extract_from_line(line) line.match(PATTERN) end end # Extract contributors from a YAML file which can be generated from another repository. class Contributors # The default path is the root of the repository and for authors who have contributed to the entire repository or unspecified paths in the past. DEFAULT_PATH = '.' # Load contributors from a directory. def self.for(root) full_path = File.join(root, '.contributors.yaml') if File.exist?(full_path) contributors = self.new contributors.extract(full_path) return contributors end end # Create a new, empty, contributors list. def initialize @contributions = [] end # Iterate over each contribution. def each(&block) @contributions.each do |contribution| author = contribution[:author] time = contribution[:time] paths_for(contribution) do |path| yield path, author, time end end end # Extract the contributors from the given path. def extract(path) @contributions.concat( YAML.load_file(path, aliases: true, symbolize_names: true, permitted_classes: [Symbol, Date, Time]) ) end # @attribute [Array(Hash)] The list of paths from a given contribution. def paths_for(contribution) return to_enum(:paths_for, contribution) unless block_given? if path = contribution[:path] yield path # elsif paths = contribution[:paths] # paths.each do |path| # yield path # end else yield DEFAULT_PATH end end end # Represents the authorship of a repository. class Authorship # Represents a modification to a file. Modification = Struct.new(:author, :time, :path, :id) do def full_name author[:name] end def key self.id || "#{self.author[:email]}:#{self.time.iso8601}" end def to_h { id: id, time: time, path: path, author: author, } end end # Represents the copyright for an author. Copyright = Struct.new(:dates, :author) do def <=> other self.to_a <=> other.to_a end def statement years = self.dates.map(&:year).uniq return "Copyright, #{years.join('-')}, by #{author}." end end # Create a new, empty, authorship. def initialize @paths = Hash.new{|h,k| h[k] = []} @commits = Hash.new{|h,k| h[k] = []} end # @attribute [Hash(String, Array(Modification))] The mapping of paths to modifications. attr :paths # @attribute [Hash(String, Array(Modification))] The mapping of commits to modifications. attr :commits # Add a modification to the authorship. def add(path, author, time, id = nil) modification = Modification.new(author, time, path, id) @commits[modification.key] << modification @paths[path] << modification end # Extract the authorship from the given root directory. def extract(root = Dir.pwd) mailmap = Mailmap.for(root) skip_list = SkipList.for(root) if contributors = Contributors.for(root) contributors.each do |path, author, time| add(path, author, time) end end walk(Rugged::Repository.discover(root), mailmap: mailmap, skip_list: skip_list) return self end # Authors, sorted by contribution date. def sorted_authors authors = Hash.new{|h,k| h[k] = 0} @commits.each do |key, modifications| modifications.map(&:full_name).uniq.each do |full_name| authors[full_name] += 1 end end return authors.sort_by{|k,v| [-v, k]}.map(&:first) end # All copyrights. def copyrights copyrights_for_modifications(@paths.values.flatten) end # All copyrights for a given path. def copyrights_for_path(path) copyrights_for_modifications(@paths[path]) end # All copyrights for a given modification. def copyrights_for_modifications(modifications) authors = modifications.group_by{|modification| modification.full_name} authors.map do |name, modifications| Copyright.new(modifications.map(&:time).minmax, name) end.sort end private DEFAULT_SORT = Rugged::SORT_DATE | Rugged::SORT_TOPO | Rugged::SORT_REVERSE def walk(repository, mailmap: nil, skip_list: nil, show: "HEAD") Rugged::Walker.walk(repository, show: show, sort: DEFAULT_SORT) do |commit| next if skip_list&.ignore?(commit) diff = commit.diff # We relax the threshold for copy and rename detection because we want to detect files that have been moved and modified more generously. diff.find_similar!( rename_threshold: 25, copy_threshold: 25, ignore_whitespace: true, ) diff.each_delta do |delta| old_path = delta.old_file[:path] new_path = delta.new_file[:path] @paths[new_path] ||= [] if old_path != new_path # The file was moved, move copyright information too: Console.logger.debug(self, "Moving #{old_path} to #{new_path}", similarity: delta.similarity) @paths[new_path].concat(@paths[old_path]) end author = commit.author if mailmap if name = mailmap.names[author[:email]] author[:name] = name end end add(new_path, author, commit.time, commit.oid) end end end end end end end