# Copyright (c) 2022 Contrast Security, Inc. See https://www.contrastsecurity.com/enduser-terms-0317a for more details.
# frozen_string_literal: true

module Contrast
  module Agent
    module Protect
      module Rule
        class Xxe
          # A holder for the external entity which was determined to be an
          # attack.
          class EntityWrapper
            attr_reader :system_id, :public_id

            def initialize entity
              @system_id = parse_system_id(entity)
              # an entity cannot be system and public
              @public_id = parse_public_id(entity) unless @system_id
            end

            def external_entity?
              if @_external_entity.nil?
                @_external_entity ||= if @system_id
                                        external_id?(@system_id)
                                      elsif @public_id
                                        external_id?(@public_id)
                                      else
                                        false
                                      end
              end
              @_external_entity
            end

            # <!ENTITY name SYSTEM "URI">
            SYSTEM_ID_REGEXP = /<!ENTITY\s+(?<name>[a-zA-Z0-f]+)\s+SYSTEM\s+"(?<id>.*?)">/.cs__freeze
            def parse_system_id entity
              match = SYSTEM_ID_REGEXP.match(entity)
              match[:id] if match
            end

            # <!ENTITY name PUBLIC "public_ID" "URI">
            PUBLIC_ID_REGEXP = /<!ENTITY\s+(?<name>[a-zA-Z0-f]+)\s+PUBLIC\s+".*?"\s+"(?<id>.*?)">/.cs__freeze
            def parse_public_id entity
              match = PUBLIC_ID_REGEXP.match(entity)
              match[:id] if match
            end

            DTD_MARKER =   '.dtd'
            FILE_START =   'file:'
            FTP_START =    'ftp:'
            GOPHER_START = 'gopher:'
            JAR_START =    'jar:'
            UP_DIR_LINUX = '../'
            UP_DIR_WIN =   '..\\'
            # we only use this against lowercase strings, removed A-Z for speed
            FILE_PATTERN_WINDOWS = /^\\*[a-z]{1,3}:.*/.cs__freeze
            def external_id? entity_id
              return false unless entity_id

              # downcase this since we don't have an ignore case compare
              tmp_id = entity_id.to_s.downcase

              # external if http(s) and not a dtd file
              http = tmp_id.start_with?(Contrast::Utils::ObjectShare::HTTP_START,
                                        Contrast::Utils::ObjectShare::HTTPS_START)
              return true if http && !tmp_id.end_with?(DTD_MARKER)

              # external if using external protocol
              return true if tmp_id.start_with?(FTP_START, FILE_START, JAR_START, GOPHER_START)

              # external if start with path marker (/ or .)
              return true if tmp_id.start_with?(Contrast::Utils::ObjectShare::SLASH,
                                                Contrast::Utils::ObjectShare::PERIOD)

              # external if start with path up marker (../ or ..\)
              return true if tmp_id.start_with?(UP_DIR_LINUX, UP_DIR_WIN)

              # external if matches windows file pattern
              tmp_id.match?(FILE_PATTERN_WINDOWS)
            end
          end
        end
      end
    end
  end
end