#!/usr/bin/env ruby # frozen_string_literal: true require 'date' require 'optparse' require 'pry' options = {} OptionParser.new do |opts| opts.banner = 'Usage: git_ownership_insights [options]' opts.on('--debug', 'Enable debug mode') do options[:debug] = true end opts.on('--exclusions STRING', 'Comma-delimited list of exclusions [example: WEB,RAILS,MOBILE]') do |exclusions| options[:exclusions] = exclusions end opts.on('--steps STRING', 'Number of steps the script will go into the past [default: 1]') do |steps| options[:steps] = steps end opts.on('--duration_in_days STRING', 'Number of days to aggregate the changes for [default: 30]') do |duration_in_days| options[:duration_in_days] = duration_in_days end opts.on('--path STRING', 'Path to the directory or file to calculate the ownership [default: "."]') do |path| options[:path] = path end opts.on('--team_regex STRING', 'Regex that will identify the team name [default: "[A-Za-z]+"]') do |team_regex| options[:team_regex] = team_regex end opts.on('--top_contributing_team STRING', 'Limit of top contributed to the directory teams in codeownership data [default: 5]') do |top_contributing_team| options[:top_contributing_team] = top_contributing_team end opts.on('--top_touched_files STRING', 'Limit of top touched files by individual contributors in codeownership data [default: 5]') do |top_touched_files| options[:top_touched_files] = top_touched_files end opts.on('--codeowners_path STRING', 'Path to CODEOWNERS file [default: .github/CODEOWNERS]') do |codeowners_path| options[:codeowners_path] = codeowners_path end opts.on('-h', '--help', 'Display this help message') do puts opts puts <<~EXAMPLES Examples: git_ownership_insights --path src/test --exclusions WEB,RAILS --steps 2 --duration_in_days 90 --debug EXAMPLES exit end end.parse! EXCLUSIONS = options[:exclusions]&.split(',') REPO_PATH = options[:path] || '.' TEAM_REGEX = options[:team_regex] || '[A-Za-z]+' TOP_TOUCHED_FILES = options[:top_touched_files] || 5 TOP_CONTRIBUTED_TEAMS = options[:top_contributing_team] || 5 CODEOWNERS_PATH = options[:codeowners_path] || ".github/CODEOWNERS" def read_codeowners_file raise "CODEOWNERS file does not exist under #{CODEOWNERS_PATH}" unless File.exist?(CODEOWNERS_PATH) codeowners = {} File.readlines(CODEOWNERS_PATH).each do |line| next if line.strip.empty? || line.start_with?('#') # Skip comments and empty lines parts = line.split(/\s+/) directory_pattern = parts[0] owner = parts[1..].map { |o| o.start_with?('@') ? o[1..] : o }.join(' ') # Remove leading '@' from team names codeowners[directory_pattern] = owner end codeowners end def find_owners(file_path, codeowners) matching_patterns = codeowners.keys.select { |pattern| file_path.include?(pattern.sub(%r{^/+}, '').chomp('/')) } return ['unknown'] if matching_patterns.empty? # Sort patterns by length in descending order sorted_patterns = matching_patterns.sort_by(&:length).reverse # Find the most specific matching pattern best_match = sorted_patterns.find { |pattern| file_path.include?(pattern.sub(%r{^/+}, '').chomp('/')) } codeowners[best_match].split(' ') end def contribution_message(directory_path:, duration_in_days:, begin_time:, debug: nil, steps: nil) duration_in_days = duration_in_days.to_i all_teams = [] files_changed_by_many_teams = 0 total_changes = 0 start_date = begin_time.to_time.to_i - duration_in_days * 86_400 end_date = begin_time.to_time.to_i file_count = `git ls-tree -r --name-only $(git rev-list -1 --before="#{end_date}" HEAD) -- "#{directory_path}" | wc -l`.to_i files_with_changes = `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`.split.sort uniq_files_with_changes = files_with_changes.uniq file_team_map = {} uniq_files_with_changes.each do |file| filename = File.basename(file) commit_count = `git log --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}" | grep -c '^commit'`.to_i next unless commit_count.positive? # Get the log of the file in the given duration git_log = `git log --pretty=format:"%s" --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}"`.split("\n") teams = git_log.map do |team| team.match(/#{TEAM_REGEX}/)[0].upcase end.reject { |e| EXCLUSIONS&.include?(e) } total_changes += teams.count all_teams << teams teams = teams.uniq files_changed_by_many_teams += 1 if teams.count > 1 file_team_map.merge!("#{filename}" => teams) puts "\n#{filename} [#{commit_count}]:#{teams}\n" if debug end occurrences = all_teams.flatten.compact.tally sorted_occurrences = occurrences.sort_by { |element, count| [-count, element] } contributors = Hash[sorted_occurrences] puts "Timeframe: #{(begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{begin_time.strftime('%Y-%m-%d')}\n Files with a single contributor: #{(100 - ((files_changed_by_many_teams.to_f / files_with_changes.count) * 100)).round(2)}%\n Amount of commits: #{total_changes}\n Total files changed: #{files_with_changes.count}\n Total files in the folder: #{file_count}\n Contributors: #{contributors}\n" touched_files = files_with_changes.flatten.compact.tally top_touched_files = touched_files.sort_by { |element, count| [-count, element] }.take(TOP_TOUCHED_FILES.to_i) codeowners = read_codeowners_file owners_data = Hash.new do |hash, key| hash[key] = { directories: Hash.new do |h, k| h[k] = { files: [] } end, total_count: 0 } end top_touched_files.each do |file, count| owners = find_owners(file, codeowners) owners.each do |owner| owners_data[owner][:total_count] += count dir_path = File.dirname(file) owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: count } end end # Sort owners_data by total count in descending order sorted_owners_data = owners_data.sort_by { |_, data| -data[:total_count] } # Take the last 5 elements top_owners_data = sorted_owners_data.last(TOP_CONTRIBUTED_TEAMS.to_i) puts ' Codeownership data:' top_owners_data.each do |owner, data| puts " #{owner.split('/').last}:\n Total Count: #{data[:total_count]}" data[:directories].each do |dir, dir_data| puts " Directory: #{dir}\n Top files:" dir_data[:files].each do |file_data| puts " #{File.basename(file_data[:name])} - #{file_data[:count]} #{file_team_map[file_data[:name]].empty? ? "[ Excluded contributor ]" : file_team_map[file_data[:name]]}" end end end steps -= 1 return unless steps.positive? contribution_message(duration_in_days: duration_in_days, directory_path: directory_path, begin_time: begin_time - duration_in_days, steps: steps, debug: debug) end puts "\nDirectory: #{REPO_PATH}\n" puts "Time period that data is aggregated by: #{options[:duration_in_days]} days" puts "Steps to jump in the past: #{options[:steps]}" puts "Limit of the teams shown in codeownership data: #{TOP_CONTRIBUTED_TEAMS}" puts "Limit of the files shown in codeownership data: #{TOP_TOUCHED_FILES}" puts "Regex to detect the teams identifiers: #{TEAM_REGEX}" puts "Excluded contributors: #{EXCLUSIONS}\n" if EXCLUSIONS puts "Debug mode is: #{options[:debug] ? 'on' : 'off'}\n\n" contribution_message(duration_in_days: options[:duration_in_days] || 30, directory_path: REPO_PATH, begin_time: DateTime.now, steps: options[:steps].to_i, debug: options[:debug])