# Copyright (c) 2023 Contrast Security, Inc. See https://www.contrastsecurity.com/enduser-terms-0317a for more details.
# frozen_string_literal: true

module Contrast
  module Agent
    module Assess
      module Policy
        module TriggerValidation
          # Validator used to assert a REDOS finding is actually vulnerable
          # before serializing that finding as a Event to report to the TeamServer.
          module REDOSValidator
            RULE_NAME = 'redos'
            # If Regexp is set to Float::Infinite this is the maximum number it will receive
            POSITIVE_INFINITY = 18_446_744_073.709553
            # We are checking and for negative infinity (-1.0/0.0 )
            NEGATIVE_INFINITY = -POSITIVE_INFINITY

            class << self
              def valid? _patcher, object, _ret, args
                # Can arrive here from either:
                #   regexp =~ string
                #   string =~ regexp
                #   regexp.match string
                #
                # Thus object/args[0] can be string/regexp or regexp/string.
                regexp = object.is_a?(Regexp) ? object : args[0]

                # regexp must be exploitable.
                return false unless regexp_vulnerable?(regexp)

                true
              end

              protected

              VULNERABLE_PATTERN = /[\[(].*?[\[(].*?[\])][*+?].*?[\])][*+?]/.cs__freeze

              # Does the regexp
              # https://bitbucket.org/contrastsecurity/assess-specifications/src/master/rules/dataflow/redos.md
              def regexp_vulnerable? regexp
                # A pattern is considered vulnerable if it has 2 or more levels of nested multi-matching.
                # A level is defined as any set of opening and closing control characters immediately followed by a
                #   multi match control character.
                # A control character is defined as one of the OPENING_CHARS, CLOSING_CHARS, or MULTI_MATCH_CHARS that
                #   is not immediately preceded by an escaping \ character.
                # OPENING_CHARS are ( and [ CLOSING_CHARS are ) and ] MULTI_MATCH_CHARS are +, *, and ?

                # Nota bene about Regexp#to_s:  it doesn't necessarily give you the original Regexp back
                # (in the sense of `my_str == Regexp.new(my_str).to_s`), it gives you a Regexp that
                # will have the same functional characteristics as the original.
                # Regexp#inspect gives you a "more nicely formatted" version than #to_s.
                # Regexp#source will give you the original source.

                # Use #match? because it doesn't fill out global variables
                # in the way match or =~ do.
                #
                # Since ruby 3.2.0 the Regexp now have a timeout option. we need to check and see if the timeout
                # is set. If so we can assume that the regexp is safe.
                # puts "SAFE #{regexp_timeout_safe?(regexp)}"
                return false if regexp_timeout_safe?(regexp)

                # report only if pattern is bad:
                VULNERABLE_PATTERN.match?(regexp.source)
              end

              # Check and see if a regexp is with safely set Timeout or not.
              #
              # @param regexp [Regexp]
              # @return [Boolean]
              def regexp_timeout_safe? regexp
                return false if RUBY_VERSION < '3.2.0'
                # Global
                return false if Regexp.timeout.nil? || regexp_infinite?(Regexp)

                # Local
                return false if regexp.cs__is_a?(Regexp) && !(regexp.timeout.nil? || regexp_infinite?(regexp))

                true
              end

              private

              # Check and see if the set timeout is set to infinity:
              #
              # @param regexp[Regexp] Instance or class
              # @return[Boolean]
              def regexp_infinite? regexp
                return false unless regexp.timeout == POSITIVE_INFINITY || regexp.timeout == NEGATIVE_INFINITY

                true
              end
            end
          end
        end
      end
    end
  end
end