# Copyright (c) 2022 Contrast Security, Inc. See https://www.contrastsecurity.com/enduser-terms-0317a for more details.
# frozen_string_literal: true

# The base class used to determine if a user input crosses a token boundary or
# state, indicating a successful attack using SQL or NoSQL Injection.
#
# @deprecated RUBY-356: this class and those that extend it are being phased out
#   in favor of the more performant code in the Service.
class Contrast::Agent::Protect::Rule::DefaultScanner # rubocop:disable Style/ClassAndModuleChildren
  # Potential states
  # :STATE_INSIDE_TOKEN
  # :STATE_INSIDE_NUMBER
  # :STATE_EXPECTING_TOKEN
  # :STATE_INSIDE_DOUBLEQUOTE
  # :STATE_INSIDE_SINGLEQUOTE
  # :STATE_INSIDE_STRING_ESCAPE_BLOCK
  # :STATE_INSIDE_LINE_COMMENT  # inside a comment that will continue to the end of the line
  # :STATE_INSIDE_BLOCK_COMMENT # inside a commend that will end with a closing tag
  # :STATE_SKIP_NEXT_CHARACTER

  def crosses_boundary query, index, input
    last_boundary = 0
    token_boundaries(query).each do |boundary|
      if boundary > index
        return last_boundary, boundary if boundary < index + input.length

        break
      end
      last_boundary = boundary
    end
    nil
  end

  def token_boundaries query
    @_token_boundaries ||= scan_token_boundaries(query)
  end

  def scan_token_boundaries query
    boundaries = []
    return boundaries unless query && !query.empty?

    state = :STATE_EXPECTING_TOKEN
    index = 0

    while index < query.length
      char = query[index]

      previous_state = state

      state = process_state(boundaries, state, char, index, query)

      case state
      when :STATE_SKIP_NEXT_CHARACTER
        index += 1
        state = previous_state
      when :STATE_INSIDE_STRING_ESCAPE_BLOCK
        index = find_escape_sequence_boundary(query, index + 1)
        state = previous_state
      when :STATE_INSIDE_BLOCK_COMMENT
        index = find_block_comment_boundary(query, index + 2)
        index += 1
        state = previous_state
        boundaries << index
      when :STATE_INSIDE_LINE_COMMENT
        index = find_new_line_boundary(query, index + 1)
        state = previous_state
        boundaries << index
      end
      index += 1
    end
    boundaries
  end

  def process_state boundaries, current_state, char, index, query
    case current_state
    when :STATE_EXPECTING_TOKEN
      process_expecting_token(boundaries, char, index, query)
    when :STATE_INSIDE_NUMBER
      process_number(boundaries, char, index, query)
    when :STATE_INSIDE_TOKEN
      process_inside_token(boundaries, char, index, query)
    when :STATE_INSIDE_DOUBLEQUOTE
      process_double_quote(boundaries, char, index, query)
    when :STATE_INSIDE_SINGLEQUOTE
      process_single_quote(boundaries, char, index, query)
    end
  end

  def process_expecting_token boundaries, char, index, query
    if char == Contrast::Utils::ObjectShare::SINGLE_QUOTE
      boundaries << index
      :STATE_INSIDE_SINGLEQUOTE
    elsif char == Contrast::Utils::ObjectShare::DOUBLE_QUOTE
      boundaries << index
      :STATE_INSIDE_DOUBLEQUOTE
    elsif char.match?(Contrast::Utils::ObjectShare::DIGIT_REGEXP)
      boundaries << index
      :STATE_INSIDE_NUMBER
    elsif start_line_comment?(char, index, query)
      boundaries << index
      :STATE_INSIDE_LINE_COMMENT
    elsif start_block_comment?(char, index, query)
      boundaries << index
      :STATE_INSIDE_BLOCK_COMMENT
    elsif char.match?(Contrast::Utils::ObjectShare::NOT_WHITE_SPACE_REGEXP)
      boundaries << index
      :STATE_INSIDE_TOKEN
    else
      :STATE_EXPECTING_TOKEN
    end
  end

  def process_inside_token boundaries, char, index, query
    if char == Contrast::Utils::ObjectShare::SINGLE_QUOTE
      boundaries << index
      :STATE_INSIDE_SINGLEQUOTE
    elsif char == Contrast::Utils::ObjectShare::DOUBLE_QUOTE
      boundaries << index
      :STATE_INSIDE_DOUBLEQUOTE
    elsif start_line_comment?(char, index, query)
      boundaries << index
      :STATE_INSIDE_LINE_COMMENT
    elsif start_block_comment?(char, index, query)
      boundaries << index
      :STATE_INSIDE_BLOCK_COMMENT
    elsif operator?(char) || char.match?(Contrast::Utils::ObjectShare::WHITE_SPACE_REGEXP)
      boundaries << index
      :STATE_EXPECTING_TOKEN
    else
      :STATE_INSIDE_TOKEN
    end
  end

  def process_number boundaries, char, index, _query
    if char.match?(Contrast::Utils::ObjectShare::DIGIT_REGEXP) || char == Contrast::Utils::ObjectShare::PERIOD
      :STATE_INSIDE_NUMBER
    else
      boundaries << index
      :STATE_EXPECTING_TOKEN
    end
  end

  def process_double_quote boundaries, char, index, query
    if escape_char?(char)
      :STATE_SKIP_NEXT_CHARACTER
    elsif escape_sequence_start?(char)
      :STATE_INSIDE_STRING_ESCAPE_BLOCK
    elsif char == Contrast::Utils::ObjectShare::DOUBLE_QUOTE
      if double_quote_escape_in_double_quote? && double_quote?(query, index + 1)
        :STATE_SKIP_NEXT_CHARACTER
      else
        boundaries << index
        :STATE_EXPECTING_TOKEN
      end
    else
      :STATE_INSIDE_DOUBLEQUOTE
    end
  end

  def process_single_quote boundaries, char, index, query
    if escape_char?(char)
      :STATE_SKIP_NEXT_CHARACTER
    elsif escape_sequence_start?(char)
      :STATE_INSIDE_STRING_ESCAPE_BLOCK
    elsif char == Contrast::Utils::ObjectShare::SINGLE_QUOTE
      if singe_quote_escape_in_singe_quote? && single_quote?(query, index + 1)
        :STATE_SKIP_NEXT_CHARACTER
      else
        boundaries << index
        :STATE_EXPECTING_TOKEN
      end
    else
      :STATE_INSIDE_SINGLEQUOTE
    end
  end

  def double_quote? query, index
    return false unless index >= 0 && index < query.length

    query[index] == Contrast::Utils::ObjectShare::DOUBLE_QUOTE
  end

  def single_quote? query, index
    return false unless index >= 0 && index < query.length

    query[index] == Contrast::Utils::ObjectShare::SINGLE_QUOTE
  end

  def find_escape_sequence_boundary query, index
    idx = index
    while idx < query.length
      char = query[idx]
      break if escape_sequence_end?(char)

      idx += 1
    end
    idx
  end

  def find_block_comment_boundary query, index
    idx = index
    while idx < query.length
      char = query[idx]
      break if end_block_comment?(char, idx, query)

      idx += 1
    end
    idx
  end

  def find_new_line_boundary query, index
    idx = index
    while idx < query.length
      char = query[idx]
      break if char == Contrast::Utils::ObjectShare::NEW_LINE
      break if char == Contrast::Utils::ObjectShare::RETURN

      idx += 1
    end
    idx
  end

  OPERATOR_PATTERN = %r{[+=*^/%><!-]}.cs__freeze
  def operator? char
    char.match?(OPERATOR_PATTERN)
  end

  # @note: Any class extending this module should override these methods as needed
  # Are the current and subsequent characters both '-' ?
  def start_line_comment? char, index, query
    return false unless char == Contrast::Utils::ObjectShare::DASH
    return false unless (query.length - 2) >= index

    query[index + 1] == Contrast::Utils::ObjectShare::DASH
  end

  # Is the current character / sequence of characters the start of a block comment
  # We assume '/*' starts the comment by default
  def start_block_comment? char, index, query
    return false unless char == Contrast::Utils::ObjectShare::SLASH
    return false unless (query.length - 2) >= index

    query[index + 1] == Contrast::Utils::ObjectShare::ASTERISK
  end

  # Is the current character / sequence of characters the end of a block comment
  # We assume '*/' ends the comment by default
  def end_block_comment? char, index, query
    return false unless char == Contrast::Utils::ObjectShare::ASTERISK
    return false unless (query.length - 2) >= index

    query[index + 1] == Contrast::Utils::ObjectShare::SLASH
  end

  # Indicates if '""' inside of double quotes is the equivalent of '\"'
  # We assume no by default
  def double_quote_escape_in_double_quote?
    false
  end

  # Indicates if "''" inside of single quotes is the equivalent of "\'"
  # We assume yes by default
  def singe_quote_escape_in_singe_quote?
    true
  end

  # Is the character provided an escape character?
  # By default, we'll assume
  def escape_char? char
    char == Contrast::Utils::ObjectShare::BACK_SLASH
  end

  # Is this the start of a string escape sequence?
  # Since escape sequences aren't supported, the answer is always false
  def escape_sequence_start? _char
    false
  end

  # Is this the end of a string escape sequence?
  # Since escape sequences aren't supported, the answer is always false
  def escape_sequence_end? _char
    false
  end
end