module Puppet::Pops
module Parser
# This module is an integral part of the Lexer.
# It defines the string slurping behavior - finding the string and non string parts in interpolated
# strings, translating escape sequences in strings to their single character equivalence.
#
# PERFORMANCE NOTE: The various kinds of slurping could be made even more generic, but requires
# additional parameter passing and evaluation of conditional logic.
# TODO: More detailed performance analysis of excessive character escaping and interpolation.
#
module SlurpSupport
  include LexerSupport

  SLURP_SQ_PATTERN  = /(?:[^\\]|^|[^\\])(?:[\\]{2})*[']/
  SLURP_DQ_PATTERN  = /(?:[^\\]|^|[^\\])(?:[\\]{2})*(["]|[$]\{?)/
  SLURP_UQ_PATTERN  = /(?:[^\\]|^|[^\\])(?:[\\]{2})*([$]\{?|\z)/
  # unquoted, no escapes
  SLURP_UQNE_PATTERN  = /(\$\{?|\z)/m
  SLURP_ALL_PATTERN = /.*(\z)/m
  SQ_ESCAPES = %w{ \\ ' }
  DQ_ESCAPES = %w{ \\  $ ' " r n t s u}+["\r\n", "\n"]
  UQ_ESCAPES = %w{ \\  $ r n t s u}+["\r\n", "\n"]

  def slurp_sqstring
    # skip the leading '
    @scanner.pos += 1
    str = slurp(@scanner, SLURP_SQ_PATTERN, SQ_ESCAPES, :ignore_invalid_escapes)
    lex_error(Issues::UNCLOSED_QUOTE, :after => "\"'\"", :followed_by => followed_by) unless str
    str[0..-2] # strip closing "'" from result
  end

  def slurp_dqstring
    scn = @scanner
    last = scn.matched
    str = slurp(scn, SLURP_DQ_PATTERN, DQ_ESCAPES, false)
    unless str
      lex_error(Issues::UNCLOSED_QUOTE, :after => format_quote(last), :followed_by => followed_by)
    end

    # Terminator may be a single char '"', '$', or two characters '${' group match 1 (scn[1]) from the last slurp holds this
    terminator = scn[1]
    [str[0..(-1 - terminator.length)], terminator]
  end

   # Copy from old lexer - can do much better
   def slurp_uqstring
     scn = @scanner
     last = scn.matched
     ignore = true

     str = slurp(scn, @lexing_context[:uq_slurp_pattern], @lexing_context[:escapes], :ignore_invalid_escapes)

     # Terminator may be a single char '$', two characters '${', or empty string '' at the end of intput.
     # Group match 1 holds this.
     # The exceptional case is found by looking at the subgroup 1 of the most recent match made by the scanner (i.e. @scanner[1]).
     # This is the last match made by the slurp method (having called scan_until on the scanner).
     # If there is a terminating character is must be stripped and returned separately.
     #
     terminator = scn[1]
     [str[0..(-1 - terminator.length)], terminator]
   end

  # Slurps a string from the given scanner until the given pattern and then replaces any escaped
  # characters given by escapes into their control-character equivalent or in case of line breaks, replaces the
  # pattern \r?\n with an empty string.
  # The returned string contains the terminating character. Returns nil if the scanner can not scan until the given
  # pattern.
  #
  def slurp(scanner, pattern, escapes, ignore_invalid_escapes)
    str = scanner.scan_until(pattern) || return

    return str unless str.include?('\\')

    return str.gsub!(/\\(\\|')/m, '\1') || str if escapes.equal?(SQ_ESCAPES)

    # Process unicode escapes first as they require getting 4 hex digits
    # If later a \u is found it is warned not to be a unicode escape
    if escapes.include?('u')
      str.gsub!(/\\u(?:([\da-fA-F]{4})|\{([\da-fA-F]{1,6})\})/m) {
        [($1 || $2).hex].pack("U")
      }
    end

    begin
    str.gsub!(/\\([^\r\n]|(?:\r?\n))/m) {
      ch = $1
      if escapes.include? ch
        case ch
        when 'r'   ; "\r"
        when 'n'   ; "\n"
        when 't'   ; "\t"
        when 's'   ; ' '
        when 'u'
          lex_warning(Issues::ILLEGAL_UNICODE_ESCAPE)
          "\\u"
        when "\n"  ; ''
        when "\r\n"; ''
        else      ch
        end
      else
        lex_warning(Issues::UNRECOGNIZED_ESCAPE, :ch => ch) unless ignore_invalid_escapes
        "\\#{ch}"
      end
    }
    rescue ArgumentError => e
      # A invalid byte sequence may be the result of faulty input as well, but that could not possibly
      # have reached this far... Unfortunately there is no more specific error and a match on message is
      # required to differentiate from other internal problems.
      if e.message =~ /invalid byte sequence/
        lex_error(Issues::ILLEGAL_UNICODE_ESCAPE)
      else
        raise e
      end
    end
    str
  end
end
end
end