# Copyright (c) 2022 Contrast Security, Inc. See https://www.contrastsecurity.com/enduser-terms-0317a for more details.
# frozen_string_literal: true

require 'contrast/agent/assess/rule/response/header_rule'
require 'contrast/agent/assess/rule/response/body_rule'
require 'contrast/utils/object_share'
require 'contrast/utils/string_utils'
require 'json'

module Contrast
  module Agent
    module Assess
      module Rule
        module Response
          # These rules check the content of the HTTP Response to determine if the body or the headers include and/or
          # set incorrectly the cache-control header
          class CacheControl < HeaderRule
            include BodyRule
            HEADER_KEYS = %w[Cache-Control].cs__freeze
            ACCEPTED_VALUES = [/no-store/, /no-cache/].cs__freeze
            DEFAULT_SAFE = false
            META_START_STR = /<meta/i.cs__freeze
            HEAD_TAG = /<head>/i.cs__freeze
            NAME = 'cache-control'

            def rule_id


            # Determine if the Response violates the Rule or not. If it does, return the evidence that proves it so.
            # @param response [Contrast::Agent::Response] the response of the application
            # @return [Hash<String,Array<Hash<String,String>>>, nil] the evidence required to prove the violation of
            #   the rule
            def violated? response
              cache_header = cache_control_from(response)
              cache_meta = cache_meta_tags(response)

              has_header = cache_header && !cache_header.blank?
              has_meta = cache_meta.any?
              # Because we're not safe by default, the rule should never hit this case, but we'll handle it just in
              # case.
              return { DATA => Contrast::Utils::ObjectShare::EMPTY_ARRAY.to_json } unless has_header || has_meta

              evidence = []
              # If we have a header tag, then we need to make sure it is set safely. If it is, there'll be no evidence
              # and we can return as the header prevents violation.
              if has_header
                header_evidence = header_evidence(cache_header)
                return unless header_evidence

                evidence << header_evidence

              # If we have no header, or an unsafe header, then we need to check the meta tag to make sure it is set
              # safely. If it is, there'll be no evidence and we can return as the meta tag prevents violation.
              if has_meta
                tag_evidence = tag_evidence(cache_meta)
                return unless tag_evidence

                evidence << tag_evidence

              # Otherwise, we'll report the violation.
              { DATA => evidence.to_json }

            # @param response [Contrast::Agent::Response] the response of the application
            # @return [Array<Hash<String,String>]
            def cache_meta_tags response
              html_elements(response.body&.split(HEAD_TAG)&.last, META_START_STR).
                  select { |tag| cache_control_tag?(tag[HTML_PROP]) }

            # Process Header value to determine if it violates rule
            # @param cache_control [String] the value of the Cache-Control header
            # @return [Hash<String,String>, nil] the evidence hash or nil
            def header_evidence cache_control
              # If header is valid, then this portion of the rule isn't violated.
              return if valid_header?(cache_control)

              # evidence requires header value string, pull directly instead of rebuilding from hash
              evidence(HEADER_TYPE, NAME, cache_control)

            # Process Body to determine if cache control meta tag violates rule
            # @param cache_meta_tags [Array<Hash>] the meta tags which contain Cache-Control values
            # @return [Hash<String,String>, nil] the evidence hash or nil
            def tag_evidence cache_meta_tags
              violation = cache_meta_tags.find { |tag| !safe_meta_cache_tag?(tag[HTML_PROP]) }
              violation ? evidence(META_TYPE, PRAGMA, violation[HTML_PROP]) : nil

            def potential_elements section, element_start

            def accepted_http_values
              [/'cache-control'/i, /"cache-control"/i]

            def accepted_values
              [/'no-cache'/i, /"no-cache"/i, /"no-store"/i, /'no-store'/i, /'cache-control'/i, /"cache-control"/i]

            # @param tag [String] the tag to check
            # @return [Boolean] if the tag has cache-control settings or not
            def cache_control_tag? tag
              http_equiv_idx = tag =~ /http-equiv=/i
              return false unless http_equiv_idx

              content_idx = tag =~ /content=/i
              return false unless content_idx

              # determine the value of the http-equiv if it's cache-control
              http_equiv_idx += 11
              accepted_http_values.any? { |el| (tag =~ el) == http_equiv_idx }

            # Determine if the given metatag does not have a valid cache-control tag.
            # Meta tags has the option to set http-equiv and content to set the http response header
            # to define for the document
            # @param tag [String] the meta tag
            # @return [Boolean, nil]
            def safe_meta_cache_tag? tag
              # Here we should determine the index of the needed keys
              # http-equiv and content
              http_equiv_idx = tag =~ /http-equiv=/i
              return false unless http_equiv_idx

              content_idx = tag =~ /content=/i
              return false unless content_idx

              # determine the value of the http-equiv if it's cache-control
              http_equiv_idx += 11
              is_valid = accepted_http_values.any? { |el| (tag =~ el) == http_equiv_idx }
              return false unless is_valid

              content_idx += 8
              accepted_values.any? { |value| (tag =~ value) == content_idx }

            # This method accepts the violation and transforms it to the proper hash before returning a violation.
            # Unlike other rules, this returns a complex structure to be converted to JSON on reporting -- do NOT cast
            # it here as that'll result in extra escaping later.
            # @param type [String] String of Header or META of the type
            # @param name [String] String of either cache-control or pragma
            # @param value [String] String of the violated value
            # @return [Hash<String, String>]
            def evidence type, name, value
              { 'type' => type, 'name' => name, 'value' => value }

            # return the cache control value from the response, either as a Hash in later versions of Rails or as a
            # String in all other frameworks/ response types (remember, response can be a few things).
            # @param response [Contrast::Agent::Response]
            # @return [String]
            def cache_control_from response
              control = if response.rack_response.cs__is_a?(Rack::Response)
              control.cs__is_a?(Hash) ? cache_control_to_s(control) : control

            # Rebuilds the String value of the Cache-Control Header from the hash build in the Rack::Response
            # @param hsh [Hash]
            # @return [String]
            def cache_control_to_s hsh
              values = []
              hsh.each_pair do |k, v|
                key = k.to_s.tr('_', '-')
                values << if key.to_sym == :extras
                          elsif v.is_a?(TrueClass)
                            "#{ key }=#{ v }"
              values.join(', ')