# frozen_string_literal: true require 'pry' require 'date' class GitOwnershipInsights def initialize(directory_path:, duration_in_days:, begin_time:, debug: nil, steps: 1) @directory_path = directory_path @duration_in_days = duration_in_days @begin_time = begin_time @debug = debug @steps = steps end def true?(obj) obj.to_s.downcase == 'true' end def read_codeowners_file raise "CODEOWNERS file does not exist under #{CODEOWNERS_PATH}" unless File.exist?(CODEOWNERS_PATH) codeowners = {} File.readlines(CODEOWNERS_PATH).each do |line| next if line.strip.empty? || line.start_with?('#') # Skip comments and empty lines parts = line.split(/\s+/) directory_pattern = parts[0] owner = parts[1..].map { |o| o.start_with?('@') ? o[1..] : o }.join(' ') # Remove leading '@' from team names codeowners[directory_pattern] = owner end codeowners end def find_owners(file_path, codeowners) matching_patterns = codeowners.keys.select do |pattern| pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}") file_path =~ pattern_regex end return ['unknown'] if matching_patterns.empty? # Sort patterns by length in descending order sorted_patterns = matching_patterns.sort_by(&:length).reverse # Find the most specific matching pattern best_match = sorted_patterns.find do |pattern| pattern_regex = Regexp.new("^#{Regexp.escape(pattern.sub(%r{^/+}, '').chomp('/')).gsub('\*', '.*').gsub('**', '.*?')}") file_path =~ pattern_regex end codeowners[best_match].split(' ') end def count_big_files(directory_path, size: BIG_FILE_SIZE) size = size.to_i # Get a list of all files in the specified directory files = Dir.glob(File.join(directory_path, '**', '*')).select { |file| File.file?(file) } code_files = files.select do |f| extension = File.extname(f) valid_extensions = CODE_EXTENSIONS valid_extensions.include?(extension) end # Initialize a counter for files that meet the criteria count = 0 # Iterate through each file and check the line count code_files.each do |file| lines_count = File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count count += 1 if lines_count > size end puts " *Current(*) total number of code files longer than #{size} lines:* #{count}" end def count_hotspot_lines(files) code_files = files.select do |f| extension = File.extname(f) valid_extensions = CODE_EXTENSIONS valid_extensions.include?(extension) end count = 0 code_files.each do |file| lines_count = File.foreach(file).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count count += lines_count end puts " *Total lines of hotspot code:* #{count}" end def filter_existing_code_files(files) files.select do |f| next unless File.exist?(f) if EXCLUDED_FILES excluded_patterns = EXCLUDED_FILES.split(',') next if excluded_patterns.any? { |pattern| f.include?(pattern) } end extension = File.extname(f) valid_extensions = CODE_EXTENSIONS valid_extensions.include?(extension) end end def git_files(directory_path:) `git ls-tree -r --name-only $(git rev-list -1 HEAD) -- "#{directory_path}"` end def files_with_changes(directory_path:, start_date:, end_date:) `git log --name-only --pretty=format:"" --since="#{start_date}" --until="#{end_date}" "#{directory_path}"` end def git_commit_count(file:, start_date:, end_date:) `git log --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}" | grep -c '^commit'` end def git_commit_info(file:, start_date:, end_date:) `git log --pretty=format:"%s" --since="#{start_date}" --until="#{end_date}" --follow -- "#{file}"` end def contribution_message duration_in_days = @duration_in_days.to_i all_teams = [] cross_teams_count = 0 single_ownership_teams_count = 0 files_changed_by_many_teams = 0 total_changes = 0 start_date = @begin_time.to_time.to_i - duration_in_days * 86_400 - 30 * 86_400 end_date = @begin_time.to_time.to_i - 30 * 86_400 git_ls = git_files(directory_path: @directory_path) file_count = filter_existing_code_files(git_ls.split).count all_files_with_changes = files_with_changes(directory_path: @directory_path, start_date:, end_date:).split.sort code_files_with_changes = filter_existing_code_files(all_files_with_changes) uniq_code_files_with_changes = code_files_with_changes.uniq file_team_map = {} uniq_code_files_with_changes.each do |file| filename = File.basename(file) commit_count = git_commit_count(file:, start_date:, end_date:).to_i git_log = git_commit_info(file:, start_date:, end_date:).split("\n") teams = git_log.map do |team| team.match(/#{TEAM_REGEX}/)[0].upcase end.reject { |e| EXCLUSIONS&.include?(e) } total_changes += commit_count all_teams << teams teams = teams.uniq if teams.count > 1 files_changed_by_many_teams += 1 file_team_map.merge!(file.to_s => [teams, commit_count]) cross_teams_count += teams.count else single_ownership_teams_count += 1 end puts "\n#{filename} [#{commit_count}]:#{teams}\n" if @debug end occurrences = all_teams.flatten.compact.tally sorted_occurrences = occurrences.sort_by { |element, count| [-count, element] } contributors = Hash[sorted_occurrences] churn_count = file_team_map.values.map { |value| value[1] }.sum hotspot_changes_percentage = (churn_count.to_f / total_changes) * 100 # Filter files based on extension and size filtered_files = file_team_map.select do |file_path| next unless File.exist?(file_path) # Check if the file size is more than BIG_FILE_SIZE lines (excluding empty and commented lines) File.foreach(file_path).reject { |line| line.match(%r{^\s*(//|/\*.*\*/|\s*$)}) }.count > BIG_FILE_SIZE.to_i end filtered_top_touched_files = filtered_files.sort_by { |element, count| [-count.last, element] } puts '' puts "*Timeframe:* #{(@begin_time - duration_in_days).strftime('%Y-%m-%d')} to #{@begin_time.strftime('%Y-%m-%d')}" puts " *Code files with a single contributor:* #{(100 - ((files_changed_by_many_teams.to_f / file_count) * 100)).round(2)}%" puts " *Existing files changed by many teams:* #{files_changed_by_many_teams}" puts " *Current existing #{CODE_EXTENSIONS} files:* #{file_count}" puts ' *Cross-Squad Dependency:*' puts " *Contributions by multiple squads to the same files:* #{cross_teams_count}" puts " *Contributions by single squads contributing to single files:* #{single_ownership_teams_count}" puts " *Hotspot Code Changes:* #{hotspot_changes_percentage.round(2)}%" puts " *Churn count(commits to files by multiple teams):* #{churn_count}" puts " *Total amount of commits:* #{total_changes}" count_hotspot_lines(filtered_files.keys) puts " *#{CODE_EXTENSIONS} files with multiple contributors:* #{file_team_map.count}" puts " *#{CODE_EXTENSIONS} files exceeding #{BIG_FILE_SIZE} lines with multiple contributors:* #{filtered_top_touched_files.count}" puts " *Total amount of commits to #{CODE_EXTENSIONS} files:* #{total_changes}" puts " *Total #{CODE_EXTENSIONS} files changed:* #{uniq_code_files_with_changes.count}" count_big_files(@directory_path) puts " *Current(*) total of #{CODE_EXTENSIONS} files in the folder:* #{file_count}" puts " *Contributors:* #{contributors}" puts "* means that it the current(instant) repository value, all the other metrics are done over #{duration_in_days} days period" if HOTSPOT puts "\n" puts ' Hotspot changes:' filtered_top_touched_files.each do |line| puts " #{line.first.gsub(@directory_path, '')} Contributors: #{line.last.first} Commits: #{line.last.last}" end end if CODEOWNERS puts "\n" puts 'Code ownership data:' codeowners = read_codeowners_file owners_data = Hash.new do |hash, key| hash[key] = { directories: Hash.new do |h, k| h[k] = { files: [] } end, churn_count: 0 } end file_team_map.each do |file, count| owners = find_owners(file, codeowners) owners.each do |owner| owners_data[owner][:churn_count] += count.last dir_path = File.dirname(file) owners_data[owner][:directories][dir_path][:files] << { name: File.basename(file), count: } end end # Sort owners_data by total count in descending order sorted_owners_data = owners_data.sort_by { |_, data| -data[:churn_count] } # Take the last 5 elements top_owners_data = sorted_owners_data.last(TOP_CONTRIBUTED_TEAMS.to_i) converted_team_map = file_team_map.transform_keys { |key| File.basename(key) } puts ' Codeownership data:' top_owners_data.each do |owner, data| puts " #{owner.split('/').last}:\n Total Count: #{data[:churn_count]}" data[:directories].each do |dir, dir_data| puts " Directory: #{dir}\n Top files:" dir_data[:files].each do |file_data| next if converted_team_map[File.basename(file_data[:name])].nil? contributors = converted_team_map[file_data[:name]]&.first&.empty? ? ['Excluded contributor'] : converted_team_map[file_data[:name]].first puts " #{File.basename(file_data[:name])} - #{file_data[:count].last} #{contributors}}" end end end end @steps -= 1 return unless @steps.positive? system("git checkout `git rev-list -1 --before='#{(@begin_time - duration_in_days).strftime('%B %d %Y')}' HEAD`", %i[out err] => File::NULL) @begin_time -= duration_in_days contribution_message end end