# Copyright (c) 2023 Contrast Security, Inc. See https://www.contrastsecurity.com/enduser-terms-0317a for more details.
# frozen_string_literal: true

require 'contrast/agent/reporting/input_analysis/input_type'
require 'contrast/agent/reporting/input_analysis/score_level'
require 'contrast/agent/reporting/input_analysis/input_analysis'
require 'contrast/agent/protect/rule/bot_blocker/bot_blocker'
require 'contrast/agent/protect/rule/bot_blocker/bot_blocker_input_classification'
require 'contrast/agent/protect/rule/cmdi/cmdi_input_classification'
require 'contrast/agent/protect/rule/no_sqli/no_sqli'
require 'contrast/agent/protect/rule/no_sqli/no_sqli_input_classification'
require 'contrast/agent/protect/rule/sqli/sqli_input_classification'
require 'contrast/agent/protect/rule/unsafe_file_upload/unsafe_file_upload_input_classification'
require 'contrast/agent/protect/rule/unsafe_file_upload/unsafe_file_upload'
require 'contrast/agent/protect/rule/path_traversal/path_traversal'
require 'contrast/agent/protect/rule/path_traversal/path_traversal_input_classification'
require 'contrast/agent/protect/rule/input_classification/lru_cache'
require 'contrast/agent/protect/rule/input_classification/cached_result'
require 'contrast/agent/protect/rule/xss/reflected_xss_input_classification'
require 'contrast/agent/protect/rule/xss/xss'
require 'contrast/components/logger'
require 'contrast/utils/object_share'
require 'contrast/agent/protect/rule/input_classification/base64_statistic'
require 'json'

module Contrast
  module Agent
    module Protect
      # InputAnalyzer will extract input form current request context and will analyze it.
      # This will be used in for the SQLI and CMDI worth_watching_v2 implementations.
      module InputAnalyzer
        DISPOSITION_NAME = 'name'
        DISPOSITION_FILENAME = 'filename'
        PREFILTER_RULES = %w[bot-blocker unsafe-file-upload reflected-xss].cs__freeze
        INFILTER_RULES = %w[
          sql-injection cmd-injection bot-blocker unsafe-file-upload path-traversal
          nosql-injection
        ].cs__freeze
        POSTFILTER_RULES = %w[sql-injection cmd-injection path-traversal nosql-injection].cs__freeze
        AGENTLIB_TIMEOUT = 5.cs__freeze
        TIMEOUT_ERROR_MESSAGE = '[AgentLib] Timed out when processing InputAnalysisResult'
        STANDARD_ERROR_MESSAGE = '[InputAnalyzer] Exception raise while doing input analysis:'

        class << self
          include Contrast::Agent::Reporting::InputType
          include Contrast::Agent::Reporting::ScoreLevel
          include Contrast::Utils::ObjectShare
          include Contrast::Components::Logger::InstanceMethods

          # Cache for storing the input analysis result per rule
          #
          # @return [Contrast::Agent::Protect::Rule::InputClassification::LRUCache]
          def lru_cache
            @_lru_cache ||= Contrast::Agent::Protect::Rule::InputClassification::LRUCache.new
          end

          # Input decoding statistic.
          def base64_statistic
            @_base64_statistic ||= Contrast::Agent::Protect::Rule::InputClassification::Base64Statistic.new
          end

          # This method with analyze the user input from the context of the
          # current request and return new ia with extracted input types.
          #
          # @param request [Contrast::Agent::Request] current request context.
          # @return input_analysis [Contrast::Agent::Reporting::InputAnalysis, nil]
          def analyse request
            return unless Contrast::PROTECT.enabled?
            return if request.nil?

            inputs = extract_inputs(request)
            return unless inputs

            input_analysis = Contrast::Agent::Reporting::InputAnalysis.new
            input_analysis.request = request
            # Save those for trigger time
            input_analysis.inputs = inputs
            input_analysis
          end

          # Extract the inputs from the request context and label them with Protect
          # input type tags. Each tag will contain one or more user inputs.
          #
          # This methods is to be expanded and modified as needed by other Protect rules
          # and sub-rules for their requirements.
          #
          # @param request [Contrast::Agent::Request] current request context.
          # @return inputs [Hash<Contrast::Agent::Protect::InputType => user_inputs>]
          def extract_inputs request
            inputs = {}
            extract_request_inputs(inputs, request)
            extract_multipart(inputs, request)
            inputs.compact!
            inputs
          end

          # classify input by rule
          #
          # @param rule_id [String] name of the rule.
          # @param input_analysis [Contrast::Agent::Reporting::InputAnalysis] from analyze method.
          # @param interval [Integer] The timeout determined for the AgentLib analysis to be performed.
          def input_classification_for rule_id, input_analysis, interval: AGENTLIB_TIMEOUT
            return if input_analysis.analysed_rules.include?(rule_id)
            return if input_analysis.no_inputs?
            return unless (protect_rule = Contrast::PROTECT.rule(rule_id)) && protect_rule.enabled?

            input_analysis.inputs.each do |input_type, value|
              # TODO: RUBY-2110 Update the HEADER handling if possible.
              # Analyze only Header values:
              # This may break bot blocker rule:
              # value = value.values if input_type == HEADER
              next if value.nil? || value.empty?

              Timeout.timeout(interval) do
                protect_rule.classification.classify(rule_id, input_type, value, input_analysis)
              end
            end

            input_analysis
          rescue StandardError => e
            if e.cs__class == Timeout::Error
              log_error(rule_id, TIMEOUT_ERROR_MESSAGE, e)
            else
              log_error(rule_id, STANDARD_ERROR_MESSAGE, e, level: :error)
            end
            nil
          end

          # classify input by array of rules. There is a timeout for the AgentLib analysis if not set it
          # will use the default 5s.
          #
          # @param input_analysis [Contrast::Agent::Reporting::InputAnalysis] Here we will keep all the results
          #                                                       for each protect rule.
          # @param prefilter [Boolean] flag to set input analysis for prefilter rules only
          # @param postfilter [Boolean] flag to set input analysis for postfilter rules.
          # @param interval [Integer] The timeout determined for the AgentLib analysis to be performed
          # @return input_analysis [Contrast::Agent::Reporting::InputAnalysis, nil]
          # @raise [Timeout::Error] If timeout is met.
          def input_classification(input_analysis,
                                   prefilter: false,
                                   postfilter: false,
                                   interval: AGENTLIB_TIMEOUT)
            return unless input_analysis

            rules = if prefilter
                      PREFILTER_RULES
                    elsif postfilter
                      POSTFILTER_RULES
                    else
                      INFILTER_RULES
                    end

            rules.each { |rule_id| input_classification_for(rule_id, input_analysis, interval: interval) }
            input_analysis
          end

          private

          # Extract the filename and name of the  Content Disposition Header.
          #
          # @param inputs [Hash<Contrast::Agent::Protect::InputType => user_inputs>]
          # @param request [Contrast::Agent::Request] current request context.
          def extract_multipart inputs, request
            return unless (parsed_data = Rack::Multipart.parse_multipart(request.rack_request.env))

            filename = parsed_data[DISPOSITION_FILENAME]
            inputs[MULTIPART_FIELD_NAME] = filename[DISPOSITION_FILENAME.to_sym] if filename
            name = filename[DISPOSITION_NAME.to_sym]
            inputs[MULTIPART_NAME] = name if name
          end

          # Extract the parameters and query string from the request context.
          #
          # @param inputs [Hash<Contrast::Agent::Protect::InputType => user_inputs>]
          # @param request [Contrast::Agent::Request] current request context.
          def extract_request_inputs inputs, request
            inputs[BODY] = request.body
            inputs[COOKIE_NAME] = request.cookies.keys
            inputs[COOKIE_VALUE] = request.cookies.values
            inputs[HEADER] = request.headers
            inputs[METHOD] = request.request_method
            inputs[PARAMETER_NAME] = request.parameters.keys
            inputs[PARAMETER_VALUE] = request.parameters.values
            inputs[QUERYSTRING] = request.query_string
          end

          # Logs any errrors that occur during the analysis
          # Accepts a level parameter to determine if the error should be logged as an error or warning.
          #
          # @param rule_id [String] name of the rule.
          def log_error rule_id, message, error, level: :error
            if level == :error
              logger.error(message, rule_id: rule_id, error: error)
            else
              logger.warn(message, rule_id: rule_id, error: error)
            end
          end
        end
      end
    end
  end
end