# frozen_string_literal: true require 'nokogiri' require 'active_support/core_ext/enumerable' require 'rubygems/text' require 'active_support/core_ext/integer/time' module Gitlab module QA module Report # Uses the API to create or update GitLab issues with the results of tests from RSpec report files. class RelateFailureIssue < ReportAsIssue include FindSetDri DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION = 0.15 SPAM_THRESHOLD_FOR_FAILURE_ISSUES = 3 FAILURE_STACKTRACE_REGEX = %r{((.*Failure\/Error:(?.+))|(?.+))}m.freeze ISSUE_STACKTRACE_REGEX = /### Stack trace\s*(```)#{FAILURE_STACKTRACE_REGEX}(```)/m.freeze FAILED_JOB_DESCRIPTION_REGEX = %r{First happened in https?:\/\/\S+\.}m.freeze FAILED_JOB_NOTE_REGEX = %r{Failed most recently in \D+ pipeline: https?:\/\/\S+}.freeze NEW_ISSUE_LABELS = Set.new(%w[QA Quality test failure::new priority::2]).freeze IGNORE_EXCEPTIONS = ['Net::ReadTimeout', '403 Forbidden - Your account has been blocked'].freeze MultipleIssuesFound = Class.new(StandardError) def initialize(system_logs: [], max_diff_ratio: DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION, **kwargs) super @system_logs = Dir.glob(system_logs) @max_diff_ratio = max_diff_ratio.to_f @issue_type = 'issue' @commented_issue_list = Set.new end private attr_reader :max_diff_ratio def run! puts "Reporting test failures in `#{files.join(',')}` as issues in project `#{project}` via the API at `#{Runtime::Env.gitlab_api_base}`." test_results_per_file do |test_results| puts "=> Reporting tests in #{test_results.path}" test_results.each do |test| relate_failure_to_issue(test) if should_report?(test) end test_results.write end end def relate_failure_to_issue(test) puts " => Searching issues for test '#{test.name}'..." begin issue, issue_already_commented = find_and_link_issue(test) return create_issue(test) unless issue || test.quarantine? update_labels(issue, test) unless issue_already_commented rescue MultipleIssuesFound => e warn(e.message) end end def find_and_link_issue(test) issue, diff_ratio = find_failure_issue(test) return [false, true] unless issue issue_already_commented = issue_already_commented?(issue) if issue_already_commented puts " => Failure already commented on issue." else puts " => Found issue #{issue.web_url} for test '#{test.name}' with a diff ratio of #{(diff_ratio * 100).round(2)}%." post_or_update_failed_job_note(issue, test) @commented_issue_list.add(issue.web_url) end [issue, issue_already_commented] end def create_issue(test) similar_issues = pipeline_issues_with_similar_stacktrace(test) if similar_issues.size >= SPAM_THRESHOLD_FOR_FAILURE_ISSUES puts " => Similar failure issues have already been opened for same pipeline environment" puts " => Will not create new issue for this failing spec" similar_issues.each do |similar_issue| puts "Please check issue: #{similar_issue.web_url}" gitlab.create_issue_note(iid: similar_issue.iid, note: "This failed job is most likely related: #{test.ci_job_url}") end return end issue = super puts "for test '#{test.name}'." post_or_update_failed_job_note(issue, test) assign_dri(issue, test) issue end def pipeline_issues_with_similar_stacktrace(test) gitlab.find_issues(options: { state: 'opened', labels: 'QA,failure::new', created_after: past_timestamp(2) }).select do |issue| job_url_from_issue = failed_issue_job_url(issue) next unless pipeline == pipeline_env_from_job_url(job_url_from_issue) stack_trace_from_issue = cleaned_stack_trace_from_issue(issue) stack_trace_from_test = cleaned_stacktrace_from_test(test) diff_ratio = compare_stack_traces(stack_trace_from_test, stack_trace_from_issue) diff_ratio < max_diff_ratio end end def failed_issue_job_url(issue) existing_note = existing_failure_note(issue) if existing_note job_url_string = existing_note.body matched = job_url_string.match(FAILED_JOB_NOTE_REGEX) else job_url_string = issue.description matched = job_url_string.match(FAILED_JOB_DESCRIPTION_REGEX) end return unless matched job_url = matched[0].chop.split(" ").last puts "=> Found failed job url in the issue: #{job_url}" job_url end def pipeline_env_from_job_url(job_url) return if job_url.nil? if job_url.include?('/quality/') job_url.partition('/quality/').last.partition('/').first else 'master' end end def past_timestamp(hours_ago) timestamp = Time.now - (hours_ago * 60 * 60) timestamp.utc.strftime("%Y-%m-%dT%H:%M:%SZ") end def failure_issues(test) gitlab.find_issues(options: { state: 'opened', labels: 'QA' }).select do |issue| issue_title = issue.title.strip issue_title.include?(test.name) || issue_title.include?(partial_file_path(test.file)) end end def full_stacktrace(test) if test.failures.first['message_lines'].empty? || test.failures.first['message_lines'].instance_of?(String) test.failures.first['message'] else test.failures.first['message_lines'].join("\n") end end def cleaned_stack_trace_from_issue(issue) relevant_issue_stacktrace = find_issue_stacktrace(issue) return unless relevant_issue_stacktrace remove_unique_resource_names(relevant_issue_stacktrace) end def cleaned_stacktrace_from_test(test) first_test_failure_stacktrace = sanitize_stacktrace(full_stacktrace(test), FAILURE_STACKTRACE_REGEX) || full_stacktrace(test) remove_unique_resource_names(first_test_failure_stacktrace) end def compare_stack_traces(stack_trace_first, stack_trace_second) calculate_diff_ratio(stack_trace_first, stack_trace_second) end def calculate_diff_ratio(stack_trace_first, stack_trace_second) ld = Class.new.extend(Gem::Text).method(:levenshtein_distance) distance = ld.call(stack_trace_first, stack_trace_second) distance.zero? ? 0.0 : (distance.to_f / stack_trace_first.size).round(3) end def find_relevant_failure_issues(test) # rubocop:disable Metrics/AbcSize clean_first_test_failure_stacktrace = cleaned_stacktrace_from_test(test) # Search with the `search` param returns 500 errors, so we filter by ~QA and then filter further in Ruby failure_issues(test).each_with_object({}) do |issue, memo| clean_relevant_issue_stacktrace = cleaned_stack_trace_from_issue(issue) next if clean_relevant_issue_stacktrace.nil? diff_ratio = compare_stack_traces(clean_first_test_failure_stacktrace, clean_relevant_issue_stacktrace) if diff_ratio <= max_diff_ratio puts " => [DEBUG] Issue #{issue.web_url} has an acceptable diff ratio of #{(diff_ratio * 100).round(2)}%." # The `Gitlab::ObjectifiedHash` class overrides `#hash` which is used by `Hash#[]=` to compute the hash key. # This leads to a `TypeError Exception: no implicit conversion of Hash into Integer` error, so we convert the object to a hash before using it as a Hash key. # See: # - https://gitlab.com/gitlab-org/gitlab-qa/-/merge_requests/587#note_453336995 # - https://github.com/NARKOZ/gitlab/commit/cbdbd1e32623f018a8fae39932a8e3bc4d929abb?_pjax=%23js-repo-pjax-container#r44484494 memo[issue.to_h] = diff_ratio else puts " => [DEBUG] Found issue #{issue.web_url} but stacktraces are too different (#{(diff_ratio * 100).round(2)}%).\n" puts " => [DEBUG] Issue stacktrace:\n----------------\n#{clean_relevant_issue_stacktrace}\n----------------\n" puts " => [DEBUG] Failure stacktrace:\n----------------\n#{clean_first_test_failure_stacktrace}\n----------------\n" end end end def find_issue_stacktrace(issue) issue_stacktrace = sanitize_stacktrace(issue.description, ISSUE_STACKTRACE_REGEX) return issue_stacktrace if issue_stacktrace puts " => [DEBUG] Stacktrace couldn't be found for #{issue.web_url}!" end def sanitize_stacktrace(stacktrace, regex) stacktrace_match = stacktrace.match(regex) if stacktrace_match stacktrace_match[:stacktrace].split('First happened in')[0].gsub(/^\s*#.*$/, '').gsub(/^[[:space:]]+/, '').strip else puts " => [DEBUG] Stacktrace doesn't match the expected regex (#{regex}):\n----------------\n#{stacktrace}\n----------------\n" end end def remove_unique_resource_names(stacktrace) stacktrace.gsub(/qa-(test|user)-[a-z0-9-]+/, '').gsub(/(?:-|_)(?:\d+[a-z]|[a-z]+\d)[a-z\d]{4,}/, '') end def find_failure_issue(test) relevant_issues = find_relevant_failure_issues(test) return nil if relevant_issues.empty? best_matching_issue, smaller_diff_ratio = relevant_issues.min_by { |_, diff_ratio| diff_ratio } unless relevant_issues.values.count(smaller_diff_ratio) == 1 # rubocop:disable Style/IfUnlessModifier raise(MultipleIssuesFound, %(Too many issues found for test '#{test.name}' (`#{test.file}`)!)) end # Re-instantiate a `Gitlab::ObjectifiedHash` object after having converted it to a hash in #find_relevant_failure_issues above. best_matching_issue = Gitlab::ObjectifiedHash.new(best_matching_issue) test.failure_issue ||= best_matching_issue.web_url [best_matching_issue, smaller_diff_ratio] end def new_issue_description(test) super + [ "\n\n### Stack trace", "```\n#{full_stacktrace(test)}\n```", "First happened in #{test.ci_job_url}.", "Related test case: #{test.testcase}.", screenshot_section(test), system_log_errors_section(test) ].join("\n\n") end def system_log_errors_section(test) correlation_id = test.failures.first['correlation_id'] section = '' if @system_logs.any? && !correlation_id.nil? section = SystemLogs::SystemLogsFormatter.new( @system_logs, correlation_id ).system_logs_summary_markdown end puts " => No system logs or correlation id provided, skipping this section in issue description" if section.empty? section end def new_issue_labels(test) up_to_date_labels(test: test, new_labels: NEW_ISSUE_LABELS) end def up_to_date_labels(test:, issue: nil, new_labels: Set.new) super << pipeline_name_label end def post_or_update_failed_job_note(issue, test) current_note = "Failed most recently in #{pipeline} pipeline: #{test.ci_job_url}" existing_note = existing_failure_note(issue) return if existing_note && current_note == existing_note.body if existing_note gitlab.edit_issue_note(issue_iid: issue.iid, note_id: existing_note.id, note: current_note) else gitlab.create_issue_note(iid: issue.iid, note: current_note) end puts " => Linked #{test.ci_job_url} to #{issue.web_url}." end def new_issue_title(test) "Failure in #{super}" end def existing_failure_note(issue) gitlab.find_issue_notes(iid: issue.iid)&.find do |note| note.body.include?('Failed most recently in') end end def screenshot_section(test) section = '' failure = full_stacktrace(test) if test.screenshot? && !['500 Internal Server Error', 'fabricate_via_api!', 'Error Code 500'].any? { |e| failure.include?(e) } relative_url = gitlab.upload_file(file_fullpath: test.failure_screenshot) section = "### Screenshot: #{relative_url.markdown}" if relative_url end section end def assign_dri(issue, test) if test.product_group? dri = set_dri_via_group(test.product_group, test) dri_id = gitlab.find_user_id(username: dri) gitlab.edit_issue(iid: issue.iid, options: { assignee_id: dri_id, due_date: Date.today + 1.month }) puts " => Assigning #{dri} as DRI for the issue." else puts " => No product group metadata found for test '#{test.name}'" end end # Checks if a test failure should be reported. # # @return [Boolean] false if the test was skipped or failed because of a transient error that can be ignored. # Otherwise returns true. def should_report?(test) return false if test.failures.empty? if test.report.key?('exceptions') reason = ignore_failure_reason(test.report['exceptions']) if reason puts "Failure reporting skipped because #{reason}" return false end end true end # Determine any reason to ignore a failure. # # @param [Array] exceptions the exceptions associated with the failure. # @return [String] the reason to ignore the exceptions, or `nil` if any exceptions should not be ignored. def ignore_failure_reason(exceptions) exception_messages = exceptions .filter_map { |exception| exception['message'] if IGNORE_EXCEPTIONS.any? { |e| exception['message'].include?(e) } } .compact return if exception_messages.empty? || exception_messages.size < exceptions.size msg = exception_messages.many? ? 'the errors were' : 'the error was' "#{msg} #{exception_messages.join(', ')}" end def issue_already_commented?(issue) @commented_issue_list.include?(issue.web_url) end end end end end