module RipperRubyParser
  # Implements string unescaping
  #
  # @api private
  module Unescape
    module_function

    ESCAPE_SEQUENCE_REGEXP =
      /\\(
        [0-7]{1,3}        | # octal character
        x[0-9a-fA-F]{1,2} | # hex byte
        u[0-9a-fA-F]{4}   | # unicode character
        M-\\C-.           | # meta-ctrl
        C-\\M-.           | # ctrl-meta
        M-\\c.            | # meta-ctrl (shorthand)
        c\\M-.            | # ctrl-meta (shorthand)
        C-.               | # control (regular)
        c.                | # control (shorthand)
        M-.               | # meta
        \n                | # line continuation
        .                   # single-character
      )/x

    SINGLE_LETTER_ESCAPES = {
      'a' => "\a",
      'b' => "\b",
      'e' => "\e",
      'f' => "\f",
      'n' => "\n",
      'r' => "\r",
      's' => "\s",
      't' => "\t",
      'v' => "\v"
    }.freeze

    SINGLE_LETTER_ESCAPES_REGEXP =
      Regexp.new("^[#{SINGLE_LETTER_ESCAPES.keys.join}]$")

    def simple_unescape(string)
      string.gsub(/\\(
        '   | # single quote
        \\    # backslash
      )/x) do
        Regexp.last_match[1]
      end
    end

    def simple_unescape_wordlist_word(string)
      string.gsub(/\\(
        '   | # single quote
        \\  | # backslash
        \n    # newline
      )/x) do
        Regexp.last_match[1]
      end
    end

    def unescape(string)
      string.gsub(ESCAPE_SEQUENCE_REGEXP) do
        bare = Regexp.last_match[1]
        if bare == "\n"
          ''
        else
          unescaped_value(bare)
        end
      end
    end

    def unescape_wordlist_word(string)
      string.gsub(ESCAPE_SEQUENCE_REGEXP) do
        bare = Regexp.last_match[1]
        unescaped_value(bare)
      end
    end

    def fix_encoding(string)
      unless string.encoding == Encoding::UTF_8
        dup = string.dup.force_encoding Encoding::UTF_8
        return dup if dup.valid_encoding?
      end
      string
    end

    def unescape_regexp(string)
      string.gsub(/\\(\n|\\)/) do
        bare = Regexp.last_match[1]
        case bare
        when "\n"
          ''
        else
          '\\\\'
        end
      end
    end

    def unescaped_value(bare)
      case bare
      when SINGLE_LETTER_ESCAPES_REGEXP
        SINGLE_LETTER_ESCAPES[bare]
      when /^x/
        bare[1..-1].to_i(16).chr
      when /^u/
        bare[1..-1].to_i(16).chr(Encoding::UTF_8)
      when /^(c|C-).$/
        (bare[-1].ord & 0b1001_1111).chr
      when /^M-.$/
        (bare[-1].ord | 0b1000_0000).chr
      when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
        (bare[-1].ord & 0b1001_1111 | 0b1000_0000).chr
      when /^[0-7]+/
        bare.to_i(8).chr
      when "\n"
        bare
      else
        bare
      end
    end
  end
end