# frozen_string_literal: true

require 'nokogiri'
require 'active_support/core_ext/enumerable'
require 'rubygems/text'
require 'active_support/core_ext/integer/time'

module GitlabQuality
  module TestTooling
    module Report
      # Uses the API to create or update GitLab issues with the results of tests from RSpec report files.
      # - Uses the API to create or update GitLab issues with the results of tests from RSpec report files.
      # - Takes the JSON test run reports, e.g. `$CI_PROJECT_DIR/gitlab-qa-run-*/**/rspec-*.json`
      # - Takes a project where failure issues should be created
      # - Find issue by title (with test description or test file), then further filter by stack trace, then pick the better-matching one
      # - Add the failed job to the issue description, and update labels
      class RelateFailureIssue < ReportAsIssue
        include Concerns::FindSetDri
        include Concerns::GroupAndCategoryLabels

        DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION = 0.15
        SYSTEMIC_EXCEPTIONS_THRESHOLD = 10
        SPAM_THRESHOLD_FOR_FAILURE_ISSUES = 3
        FAILURE_STACKTRACE_REGEX = %r{(?:(?:.*Failure/Error:(?<stacktrace>.+))|(?<stacktrace>.+))}m
        ISSUE_STACKTRACE_REGEX = /### Stack trace\s*(```)#{FAILURE_STACKTRACE_REGEX}(```)/m
        JOB_URL_REGEX = %r{(?<job_url>https://(?<host>[\w.]+)/(?<project_path>[\w\-./]+)/-/jobs/\d+)}
        FAILED_JOB_DESCRIPTION_REGEX = /First happened in #{JOB_URL_REGEX}\./m
        REPORT_ITEM_REGEX = /^1\. \d{4}-\d{2}-\d{2}: #{JOB_URL_REGEX} \((?<pipeline_url>.+)\)$/
        NEW_ISSUE_LABELS = Set.new(%w[test failure::new priority::2]).freeze
        IGNORE_EXCEPTIONS = [
          'Net::ReadTimeout',
          '403 Forbidden - Your account has been blocked'
        ].freeze
        SCREENSHOT_IGNORED_ERRORS = ['500 Internal Server Error', 'fabricate_via_api!', 'Error Code 500'].freeze

        MultipleIssuesFound = Class.new(StandardError)

        def initialize(max_diff_ratio: DEFAULT_MAX_DIFF_RATIO_FOR_DETECTION, system_logs: [], base_issue_labels: Set.new, **kwargs)
          super
          @max_diff_ratio = max_diff_ratio.to_f
          @system_logs = Dir.glob(system_logs)
          @base_issue_labels = Set.new(base_issue_labels)
          @issue_type = 'issue'
          @commented_issue_list = Set.new
        end

        private

        attr_reader :max_diff_ratio, :system_logs, :base_issue_labels

        def run!
          puts "Reporting test failures in `#{files.join(',')}` as issues in project `#{project}` via the API at `#{Runtime::Env.gitlab_api_base}`."

          TestResults::Builder.new(files).test_results_per_file do |test_results|
            puts "=> Reporting #{test_results.count} tests in #{test_results.path}"

            systemic_exceptions = systemic_exceptions_for_test_results(test_results)

            test_results.each do |test|
              relate_failure_to_issue(test) if should_report?(test, systemic_exceptions)
            end

            test_results.write
          end
        end

        def systemic_exceptions_for_test_results(test_results)
          test_results
            .flat_map { |test| test.report['exceptions']&.map { |exception| exception['message'] } }
            .compact
            .tally
            .select { |_e, count| count >= SYSTEMIC_EXCEPTIONS_THRESHOLD }
            .keys
        end

        def relate_failure_to_issue(test)
          puts " => Relating issues for test '#{test.name}'..."

          begin
            issue = find_issue_and_update_reports(test)

            create_issue(test) unless issue || test.quarantine?
          rescue MultipleIssuesFound => e
            warn(e.message)
          end
        end

        def find_issue_and_update_reports(test)
          issue, diff_ratio = find_failure_issue(test)
          return unless issue

          failure_already_reported = failure_already_reported?(issue, test)
          if failure_already_reported
            puts "  => Failure already reported on issue."
          else
            puts "  => Found issue #{issue.web_url} for test '#{test.name}' with a diff ratio of #{(diff_ratio * 100).round(2)}%."
            update_reports(issue, test)
            @commented_issue_list.add(issue.web_url)
          end

          issue
        end

        def failure_already_reported?(issue, test)
          @commented_issue_list.add(issue.web_url) if failed_issue_job_urls(issue).include?(test.ci_job_url)

          @commented_issue_list.include?(issue.web_url)
        end

        def create_issue(test)
          similar_issues = pipeline_issues_with_similar_stacktrace(test)

          if similar_issues.size >= SPAM_THRESHOLD_FOR_FAILURE_ISSUES
            puts "  => Similar failure issues have already been opened for the same pipeline environment, we won't create new issue"
            similar_issues.each do |similar_issue|
              puts "  => Please check issue: #{similar_issue.web_url}"
              update_reports(similar_issue, test)
            end
            return
          end

          super
        end

        def pipeline_issues_with_similar_stacktrace(test)
          search_labels = (base_issue_labels + Set.new(%w[test failure::new])).to_a
          gitlab.find_issues(options: { state: 'opened', labels: search_labels,
                                        created_after: past_timestamp(2) }).select do |issue|
            job_url_from_issue = failed_issue_job_url(issue)

            next if pipeline != pipeline_env_from_job_url(job_url_from_issue)

            stack_trace_from_issue = cleaned_stack_trace_from_issue(issue)
            stack_trace_from_test = cleaned_stacktrace_from_test(test)
            diff_ratio = compare_stack_traces(stack_trace_from_test, stack_trace_from_issue)
            diff_ratio < max_diff_ratio
          end
        end

        def failed_issue_job_url(issue)
          job_urls_from_description(issue.description, REPORT_ITEM_REGEX).last ||
            # Legacy format
            job_urls_from_description(issue.description, FAILED_JOB_DESCRIPTION_REGEX).last
        end

        def failed_issue_job_urls(issue)
          job_urls_from_description(issue.description, REPORT_ITEM_REGEX) +
            # Legacy format
            job_urls_from_description(issue.description, FAILED_JOB_DESCRIPTION_REGEX)
        end

        def job_urls_from_description(issue_description, regex)
          issue_description.lines.filter_map do |line|
            match = line.match(regex)
            match[:job_url] if match
          end
        end

        def pipeline_env_from_job_url(job_url)
          return if job_url.nil?

          if job_url.include?('/quality/')
            job_url.partition('/quality/').last.partition('/').first
          else
            Runtime::Env.default_branch
          end
        end

        def past_timestamp(hours_ago)
          timestamp = Time.now - (hours_ago * 60 * 60)
          timestamp.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
        end

        def failure_issues(test)
          search_labels = (base_issue_labels + Set.new(%w[test])).to_a
          gitlab.find_issues(options: { state: 'opened', labels: search_labels }).select do |issue|
            issue_title = issue.title.strip
            issue_title.include?(test.name) || issue_title.include?(partial_file_path(test.file))
          end
        end

        def full_stacktrace(test)
          if test.failures.first['message_lines'].empty? || test.failures.first['message_lines'].instance_of?(String)
            test.failures.first['message']
          else
            test.failures.first['message_lines'].join("\n")
          end
        end

        def cleaned_stack_trace_from_issue(issue)
          relevant_issue_stacktrace = find_issue_stacktrace(issue)
          return unless relevant_issue_stacktrace

          remove_unique_resource_names(relevant_issue_stacktrace)
        end

        def cleaned_stacktrace_from_test(test)
          first_test_failure_stacktrace = sanitize_stacktrace(full_stacktrace(test),
            FAILURE_STACKTRACE_REGEX) || full_stacktrace(test)
          remove_unique_resource_names(first_test_failure_stacktrace)
        end

        def compare_stack_traces(stack_trace_first, stack_trace_second)
          calculate_diff_ratio(stack_trace_first, stack_trace_second)
        end

        def calculate_diff_ratio(stack_trace_first, stack_trace_second)
          ld = Class.new.extend(Gem::Text).method(:levenshtein_distance)
          distance = ld.call(stack_trace_first, stack_trace_second)
          distance.zero? ? 0.0 : (distance.to_f / stack_trace_first.size).round(3)
        end

        def find_relevant_failure_issues(test) # rubocop:disable Metrics/AbcSize
          clean_first_test_failure_stacktrace = cleaned_stacktrace_from_test(test)
          # Search with the `search` param returns 500 errors, so we filter by `base_issue_labels` and then filter further in Ruby
          failure_issues(test).each_with_object({}) do |issue, memo|
            clean_relevant_issue_stacktrace = cleaned_stack_trace_from_issue(issue)
            next if clean_relevant_issue_stacktrace.nil?

            diff_ratio = compare_stack_traces(clean_first_test_failure_stacktrace, clean_relevant_issue_stacktrace)
            if diff_ratio <= max_diff_ratio
              puts "  => [DEBUG] Issue #{issue.web_url} has an acceptable diff ratio of #{(diff_ratio * 100).round(2)}%."
              # The `Gitlab::ObjectifiedHash` class overrides `#hash` which is used by `Hash#[]=` to compute the hash key.
              # This leads to a `TypeError Exception: no implicit conversion of Hash into Integer` error, so we convert the object to a hash before using it as a Hash key.
              # See:
              # - https://gitlab.com/gitlab-org/gitlab-qa/-/merge_requests/587#note_453336995
              # - https://github.com/NARKOZ/gitlab/commit/cbdbd1e32623f018a8fae39932a8e3bc4d929abb?_pjax=%23js-repo-pjax-container#r44484494
              memo[issue.to_h] = diff_ratio
            else
              puts "  => [DEBUG] Found issue #{issue.web_url} but stacktraces are too different (#{(diff_ratio * 100).round(2)}%).\n"
              puts "  => [DEBUG] Issue stacktrace:\n----------------\n#{clean_relevant_issue_stacktrace}\n----------------\n"
              puts "  => [DEBUG] Failure stacktrace:\n----------------\n#{clean_first_test_failure_stacktrace}\n----------------\n"
            end
          end
        end

        def find_issue_stacktrace(issue)
          issue_stacktrace = sanitize_stacktrace(issue.description, ISSUE_STACKTRACE_REGEX)
          return issue_stacktrace if issue_stacktrace

          puts "  => [DEBUG] Stacktrace couldn't be found for #{issue.web_url}!"
        end

        def sanitize_stacktrace(stacktrace, regex)
          stacktrace_match = stacktrace.match(regex)

          if stacktrace_match
            stacktrace_match[:stacktrace].gsub(/^\s*#.*$/, '').gsub(/^[[:space:]]+/, '').strip
          else
            puts "  => [DEBUG] Stacktrace doesn't match the regex (#{regex}):\n----------------\n#{stacktrace}\n----------------\n"
          end
        end

        def remove_unique_resource_names(stacktrace)
          stacktrace.gsub(/(QA User |qa-(test|user)-)[a-z0-9-]+/, '<unique-test-resource>').gsub(
            /(?:-|_)(?:\d+[a-z]|[a-z]+\d)[a-z\d]{4,}/, '<unique-hash>')
        end

        def find_failure_issue(test)
          relevant_issues = find_relevant_failure_issues(test)

          return nil if relevant_issues.empty?

          best_matching_issue, smaller_diff_ratio = relevant_issues.min_by { |_, diff_ratio| diff_ratio }

          raise(MultipleIssuesFound, %(Too many issues found for test '#{test.name}' (`#{test.file}`)!)) unless relevant_issues.values.count(smaller_diff_ratio) == 1

          # Re-instantiate a `Gitlab::ObjectifiedHash` object after having converted it to a hash in #find_relevant_failure_issues above.
          best_matching_issue = Gitlab::ObjectifiedHash.new(best_matching_issue)

          test.failure_issue ||= best_matching_issue.web_url

          [best_matching_issue, smaller_diff_ratio]
        end

        def new_issue_description(test)
          super + [
            "\n### Stack trace",
            "```\n#{full_stacktrace(test)}\n```",
            screenshot_section(test),
            system_log_errors_section(test),
            reports_section(test)
          ].compact.join("\n\n")
        end

        def system_log_errors_section(test)
          correlation_id = test.failures.first['correlation_id']
          section = ''

          if system_logs.any? && !correlation_id.nil?
            section = SystemLogs::SystemLogsFormatter.new(
              system_logs,
              correlation_id
            ).system_logs_summary_markdown
          end

          if section.empty?
            puts "  => No system logs or correlation id provided, skipping this section in issue description"
            return
          end

          section
        end

        def reports_section(test)
          <<~REPORTS
          ### Reports (1)

          #{report_list_item(test)}
          REPORTS
        end

        def report_list_item(test)
          "1. #{Time.new.utc.strftime('%F')}: #{test.ci_job_url} (#{ENV.fetch('CI_PIPELINE_URL', 'pipeline url is missing')})"
        end

        def up_to_date_labels(test:, issue: nil, new_labels: Set.new)
          (Set.new(base_issue_labels) + (super << pipeline_name_label)).to_a
        end

        def new_issue_assignee_id(test)
          return unless test.product_group?

          dri = set_dri_via_group(test.product_group, test)
          puts "  => Assigning #{dri} as DRI for the issue."

          gitlab.find_user_id(username: dri)
        end

        def new_issue_due_date(test)
          return unless test.product_group?

          Date.today + 1.month
        end

        def update_reports(issue, test)
          gitlab.edit_issue(iid: issue.iid, options: {
            description: up_to_date_issue_description(issue.description, test),
            labels: up_to_date_labels(test: test, issue: issue)
          })
          puts "  => Added a report in '#{issue.title}': #{issue.web_url}!"
        end

        def up_to_date_issue_description(issue_description, test)
          # We include the number of reports in the header, for visibility.
          new_issue_description =
            if issue_description.include?('### Reports')
              # We count the number of existing reports.
              reports_count = issue_description
                .scan(REPORT_ITEM_REGEX)
                .size.to_i + 1
              issue_description.sub(/^### Reports.*$/, "### Reports (#{reports_count})")
            else # For issue with the legacy format, we add the Reports section
              reports_count = issue_description
                .scan(JOB_URL_REGEX)
                .size.to_i + 1

              "#{issue_description}\n\n### Reports (#{reports_count})"
            end

          "#{new_issue_description}\n#{report_list_item(test)}"
        end

        def new_issue_title(test)
          "Failure in #{super}"
        end

        def screenshot_section(test)
          return unless test.screenshot?

          failure = full_stacktrace(test)
          return if SCREENSHOT_IGNORED_ERRORS.any? { |e| failure.include?(e) }

          relative_url = gitlab.upload_file(file_fullpath: test.failure_screenshot)
          return unless relative_url

          "### Screenshot\n\n#{relative_url.markdown}"
        end

        # Checks if a test failure should be reported.
        #
        # @return [TrueClass|FalseClass] false if the test was skipped or failed because of a transient error that can be ignored.
        # Otherwise returns true.
        def should_report?(test, systemic_exceptions)
          return false if test.failures.empty?

          puts "  => Systemic exceptions detected: #{systemic_exceptions}" if systemic_exceptions.any?
          exceptions_to_ignore = IGNORE_EXCEPTIONS + systemic_exceptions

          if test.report.key?('exceptions')
            reason = ignore_failure_reason(test.report['exceptions'], exceptions_to_ignore)

            if reason
              puts "  => Failure reporting skipped because #{reason}"

              return false
            end
          end

          true
        end

        # Determine any reason to ignore a failure.
        #
        # @param [Array<Hash>] exceptions the exceptions associated with the failure.
        # @return [String] the reason to ignore the exceptions, or `nil` if any exceptions should not be ignored.
        def ignore_failure_reason(exceptions, ignored_exceptions)
          exception_messages = exceptions
            .filter_map { |exception| exception['message'] if ignored_exceptions.any? { |e| exception['message'].include?(e) } }
            .compact
          return if exception_messages.empty? || exception_messages.size < exceptions.size

          msg = exception_messages.many? ? 'the errors were' : 'the error was'
          "#{msg} #{exception_messages.join(', ')}"
        end
      end
    end
  end
end