#!/usr/bin/env ruby # frozen_string_literal: true require 'date' require 'optparse' require 'pry' options = {} OptionParser.new do |opts| opts.banner = 'Usage: git_ownership_insights [options]' opts.on('--debug', 'Enable debug mode') do options[:debug] = true end opts.on('--ci', 'Do not print the info messages for better CI text parsing [default: false]') do options[:ci] = true end opts.on('--codeowners', 'Print CODEOWNERS info [default: false]') do options[:codeowners] = true end opts.on('--hotspot-files', 'Print the found hotspot files (big files touched by many) [default: false]') do options[:hotspot_files] = true end opts.on('--excluded-contributors STRING', 'Comma-delimited list of excluded contributors [example: WEB,RAILS,MOBILE]') do |exclusions| options[:exclusions] = exclusions end opts.on('--excluded-files STRING', 'Comma-delimited list of excluded files [example: ViewController,AppDelegate.swift]') do |excluded_files| options[:excluded_files] = excluded_files end opts.on('--steps STRING', 'Number of steps the script will go into the past [default: 1]') do |steps| options[:steps] = steps end opts.on('--duration-in-days STRING', 'Number of days to aggregate the changes for [default: 30]') do |duration_in_days| options[:duration_in_days] = duration_in_days end opts.on('--path STRING', 'Path to the directory or file to calculate the ownership [default: "."]') do |path| options[:path] = path end opts.on('--team-regex STRING', 'Regex that will identify the team name [default: "[A-Za-z]+"]') do |team_regex| options[:team_regex] = team_regex end opts.on('--top-contributing-team STRING', 'Limit of top contributed to the directory teams in codeownership data [default: 5]') do |top_contributing_team| options[:top_contributing_team] = top_contributing_team end opts.on('--top-touched-files STRING', 'Limit of top touched files by individual contributors in codeownership data [default: 5]') do |top_touched_files| options[:top_touched_files] = top_touched_files end opts.on('--codeowners-path STRING', 'Path to CODEOWNERS file [default: .github/CODEOWNERS]') do |codeowners_path| options[:codeowners_path] = codeowners_path end opts.on('--big-file-size STRING', 'The amount of lines in the file to be considered big [default: 250]') do |big_file_size| options[:big_file_size] = big_file_size end opts.on('--default-branch STRING', 'The default branch to pull and run metrics for [default: master]') do |default_branch| options[:default_branch] = default_branch end opts.on('--code-extensions STRING', 'The file extensions that consider to be code [default: ".kt, .swift"]') do |code_extension| options[:code_extension] = code_extension end opts.on('-h', '--help', 'Display this help message') do puts opts puts <<~EXAMPLES Examples: git_ownership_insights --path src/test --exclusions WEB,RAILS --steps 2 --duration-in-days 90 --hotspot-files --debug EXAMPLES exit end end.parse! EXCLUSIONS = options[:exclusions]&.split(',') REPO_PATH = options[:path] || '.' TEAM_REGEX = options[:team_regex] || '[A-Za-z]+' TOP_TOUCHED_FILES = options[:top_touched_files] || 5 TOP_CONTRIBUTED_TEAMS = options[:top_contributing_team] || 5 CODEOWNERS_PATH = options[:codeowners_path] || ".github/CODEOWNERS" BIG_FILE_SIZE = options[:big_file_size] || 250 CI = options[:ci] || false DEFAULT_BRANCH = options[:default_branch] || 'master' CODEOWNERS = options[:codeowners] || false HOTSPOT = options[:hotspot_files] || false CODE_EXTENSIONS = options[:code_extension] ? options[:code_extension].split : ['.swift', '.kt'] EXCLUDED_FILES = options[:excluded_files] def true?(obj) obj.to_s.downcase == "true" end def read_codeowners_file raise "CODEOWNERS file does not exist under #{CODEOWNERS_PATH}" unless File.exist?(CODEOWNERS_PATH) codeowners = {} File.readlines(CODEOWNERS_PATH).each do |line| next if line.strip.empty? || line.start_with?('#') # Skip comments and empty lines parts = line.split(/\s+/) directory_pattern = parts[0] owner = parts[1..].map { |o| o.start_with?('@') ? o[1..] : o }.join(' ') # Remove leading '@' from team names codeowners[directory_pattern] = owner end codeowners end def find_owners(file_path, codeowners) matching_patterns = codeowners.keys.select do |pattern| pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}") file_path =~ pattern_regex end return ['unknown'] if matching_patterns.empty? # Sort patterns by length in descending order sorted_patterns = matching_patterns.sort_by(&:length).reverse # Find the most specific matching pattern best_match = sorted_patterns.find do |pattern| pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}") file_path =~ pattern_regex end codeowners[best_match].split(' ') end def count_big_files(directory_path, size: BIG_FILE_SIZE) # Get a list of all files in the specified directory files = Dir.glob(File.join(directory_path, '**', '*')).select { |file| File.file?(file) } code_files = files.select {|f| extension = File.extname(f) valid_extensions = ['.swift', '.kt'] valid_extensions.include?(extension) } # Initialize a counter for files that meet the criteria count = 0 # Iterate through each file and check the line count code_files.each do |file| lines_count = File.foreach(file).reject { |line| line.match(/^\s*(\/\/|\/\*.*\*\/|\s*$)/) }.count if lines_count > size count += 1 end end puts " Total number of files longer than #{size} lines: #{count}" end def contribution_message(directory_path:, duration_in_days:, begin_time:, debug: nil, steps: nil) duration_in_days = duration_in_days.to_i all_teams = [] files_changed_by_many_teams = 0 total_changes = 0 start_date = begin_time.to_time.to_i - duration_in_days * 86_400 end_date = begin_time.to_time.to_i file_count = `git ls-tree -r --name-only $(git rev-list -1 --since="#{start_date}" --until="#{end_date}" HEAD) -- "#{directory_path}" | wc -l`.to_i all_files_with_changes = `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"`.split.sort excluded_patterns = EXCLUDED_FILES.split(',') if EXCLUDED_FILES code_files_with_changes = all_files_with_changes.select {|f| extension = File.extname(f) valid_extensions = CODE_EXTENSIONS valid_extensions.include?(extension) } if EXCLUDED_FILES code_files_with_changes = code_files_with_changes.reject do |file| excluded_patterns.any? { |pattern| file.include?(pattern) } end end uniq_code_files_with_changes = code_files_with_changes.uniq file_team_map = {} uniq_code_files_with_changes.each do |file| filename = File.basename(file) commit_count = `git log --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}" | grep -c '^commit'`.to_i # Get the log of the file in the given duration git_log = `git log --pretty=format:"%s" --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}"`.split("\n") teams = git_log.map do |team| team.match(/#{TEAM_REGEX}/)[0].upcase end.reject { |e| EXCLUSIONS&.include?(e) } total_changes += commit_count all_teams << teams teams = teams.uniq if teams.count > 1 files_changed_by_many_teams += 1 file_team_map.merge!("#{file}" => [teams, commit_count]) end puts "\n#{filename} [#{commit_count}]:#{teams}\n" if debug end occurrences = all_teams.flatten.compact.tally sorted_occurrences = occurrences.sort_by { |element, count| [-count, element] } contributors = Hash[sorted_occurrences] churn_count = file_team_map.values.map { |value| value[1] }.sum hotspot_changes_percentage = (churn_count.to_f / total_changes.to_f)*100 puts "Timeframe: #{(begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{begin_time.strftime('%Y-%m-%d')}\n Code files with a single contributor: #{(100 - ((files_changed_by_many_teams.to_f / code_files_with_changes.count.to_f) * 100)).round(2)}%\n Hotspot code changes: #{churn_count} (#{hotspot_changes_percentage.round(2)}%)\n Amount of code changes: #{total_changes}\n Total files changed: #{code_files_with_changes.count}\n Total files in the folder: #{file_count}\n Contributors: #{contributors}\n" # Filter files based on extension and size filtered_files = file_team_map.select do |file_path| next unless File.exist?(file_path) # Check if the file size is more than BIG_FILE_SIZE lines (excluding empty and commented lines) File.foreach(file_path).reject { |line| line.match(/^\s*(\/\/|\/\*.*\*\/|\s*$)/) }.count > BIG_FILE_SIZE.to_i end filtered_top_touched_files = filtered_files.sort_by { |element, count| [-count.last, element] } count_big_files(directory_path) puts " Total files longer than #{BIG_FILE_SIZE} lines with multiple contributors: #{filtered_top_touched_files.count}\n" if HOTSPOT filtered_top_touched_files.each do |line| puts " #{line.first.gsub(directory_path, '')} Contributors: #{line.last.first} Commits: #{line.last.last}" end end puts "\n\n" if CODEOWNERS codeowners = read_codeowners_file owners_data = Hash.new do |hash, key| hash[key] = { directories: Hash.new do |h, k| h[k] = { files: [] } end, churn_count: 0 } end file_team_map.each do |file, count| owners = find_owners(file, codeowners) owners.each do |owner| owners_data[owner][:churn_count] += count.last dir_path = File.dirname(file) owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: count } end end # Sort owners_data by total count in descending order sorted_owners_data = owners_data.sort_by { |_, data| -data[:churn_count] } # Take the last 5 elements top_owners_data = sorted_owners_data.last(TOP_CONTRIBUTED_TEAMS.to_i) converted_team_map = file_team_map.transform_keys { |key| File.basename(key) } puts ' Codeownership data:' top_owners_data.each do |owner, data| puts " #{owner.split('/').last}:\n Total Count: #{data[:churn_count]}" data[:directories].each do |dir, dir_data| puts " Directory: #{dir}\n Top files:" dir_data[:files].each do |file_data| next if converted_team_map[File.basename(file_data[:name])].nil? contributors = converted_team_map[file_data[:name]]&.first&.empty? ? [ "Excluded contributor" ] : converted_team_map[file_data[:name]].first puts " #{File.basename(file_data[:name])} - #{file_data[:count].last} #{contributors}}" end end end end steps -= 1 return unless steps.positive? system("git checkout `git rev-list -1 --before='#{(begin_time - duration_in_days).strftime("%B %d %Y")}' HEAD`", [ :out, :err ] => File::NULL) contribution_message(duration_in_days: duration_in_days, directory_path: directory_path, begin_time: begin_time - duration_in_days, steps: steps, debug: debug) end unless CI puts "\nDirectory: #{REPO_PATH}\n" puts "Time period that data is aggregated by: #{options[:duration_in_days]} days" puts "Steps to jump in the past: #{options[:steps].to_i}" puts "Runs against: #{DEFAULT_BRANCH}" puts "Code extensions: #{CODE_EXTENSIONS}" puts "Regex to detect the teams identifiers: #{TEAM_REGEX}" puts "Excluded contributors: #{EXCLUSIONS}\n" if EXCLUSIONS puts "Excluded file patterns: #{EXCLUDED_FILES.split(',')}\n" if EXCLUDED_FILES puts "Lines of code limit (big files) for the hotspot calculation: #{BIG_FILE_SIZE}" puts "Hotspot detailed output is: #{options[:hotspot_files] ? 'on' : 'off'}\n" puts "CODEOWNERS output is: #{options[:codeowners] ? 'on' : 'off'}\n" puts "Limit of the teams shown in codeownership data: #{TOP_CONTRIBUTED_TEAMS}" puts "Limit of the files shown in codeownership data: #{TOP_TOUCHED_FILES}" puts "CI mode is: #{options[:ci] ? 'on' : 'off'}\n" puts "Debug mode is: #{options[:debug] ? 'on' : 'off'}\n\n" end system("git checkout #{DEFAULT_BRANCH}", [ :out ] => File::NULL) system("git pull", [ :out ] => File::NULL) contribution_message(duration_in_days: options[:duration_in_days] || 30, directory_path: REPO_PATH, begin_time: DateTime.now, steps: options[:steps].to_i, debug: options[:debug])