# frozen_string_literal: true require "strscan" module GraphQL module Language module Lexer IDENTIFIER = /[_A-Za-z][_0-9A-Za-z]*/ NEWLINE = /[\c\r\n]/ BLANK = /[, \t]+/ COMMENT = /#[^\n\r]*/ INT = /[-]?(?:[0]|[1-9][0-9]*)/ FLOAT_DECIMAL = /[.][0-9]+/ FLOAT_EXP = /[eE][+-]?[0-9]+/ FLOAT = /#{INT}(#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP})/ module Literals ON = /on\b/ FRAGMENT = /fragment\b/ TRUE = /true\b/ FALSE = /false\b/ NULL = /null\b/ QUERY = /query\b/ MUTATION = /mutation\b/ SUBSCRIPTION = /subscription\b/ SCHEMA = /schema\b/ SCALAR = /scalar\b/ TYPE = /type\b/ EXTEND = /extend\b/ IMPLEMENTS = /implements\b/ INTERFACE = /interface\b/ UNION = /union\b/ ENUM = /enum\b/ INPUT = /input\b/ DIRECTIVE = /directive\b/ REPEATABLE = /repeatable\b/ LCURLY = '{' RCURLY = '}' LPAREN = '(' RPAREN = ')' LBRACKET = '[' RBRACKET = ']' COLON = ':' VAR_SIGN = '$' DIR_SIGN = '@' ELLIPSIS = '...' EQUALS = '=' BANG = '!' PIPE = '|' AMP = '&' end include Literals QUOTE = '"' UNICODE_DIGIT = /[0-9A-Za-z]/ FOUR_DIGIT_UNICODE = /#{UNICODE_DIGIT}{4}/ N_DIGIT_UNICODE = %r{#{LCURLY}#{UNICODE_DIGIT}{4,}#{RCURLY}}x UNICODE_ESCAPE = %r{\\u(?:#{FOUR_DIGIT_UNICODE}|#{N_DIGIT_UNICODE})} # # https://graphql.github.io/graphql-spec/June2018/#sec-String-Value STRING_ESCAPE = %r{[\\][\\/bfnrt]} BLOCK_QUOTE = '"""' ESCAPED_QUOTE = /\\"/; STRING_CHAR = /#{ESCAPED_QUOTE}|[^"\\]|#{UNICODE_ESCAPE}|#{STRING_ESCAPE}/ LIT_NAME_LUT = Literals.constants.each_with_object({}) { |n, o| key = Literals.const_get(n) key = key.is_a?(Regexp) ? key.source.gsub(/(\\b|\\)/, '') : key o[key] = n } LIT = Regexp.union(Literals.constants.map { |n| Literals.const_get(n) }) QUOTED_STRING = %r{#{QUOTE} (?:#{STRING_CHAR})* #{QUOTE}}x BLOCK_STRING = %r{ #{BLOCK_QUOTE} (?: [^"\\] | # Any characters that aren't a quote or slash (?= 0xD800 && codepoint_1 <= 0xDBFF) && # leading surrogate (codepoint_2 >= 0xDC00 && codepoint_2 <= 0xDFFF) # trailing surrogate # A surrogate pair combined = ((codepoint_1 - 0xD800) * 0x400) + (codepoint_2 - 0xDC00) + 0x10000 [combined].pack('U'.freeze) else # Two separate code points [codepoint_1].pack('U'.freeze) + [codepoint_2].pack('U'.freeze) end else [codepoint_1].pack('U'.freeze) end end nil end def self.record_comment(ts, te, meta, str) token = [ :COMMENT, meta[:line], meta[:col], str, meta[:previous_token], ] meta[:previous_token] = token meta[:col] += te - ts end ESCAPES = /\\["\\\/bfnrt]/ ESCAPES_REPLACE = { '\\"' => '"', "\\\\" => "\\", "\\/" => '/', "\\b" => "\b", "\\f" => "\f", "\\n" => "\n", "\\r" => "\r", "\\t" => "\t", } UTF_8 = /\\u(?:([\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))?/i VALID_STRING = /\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o def self.emit_block(ts, te, meta, value) line_incr = value.count("\n") value = GraphQL::Language::BlockString.trim_whitespace(value) emit_string(ts, te, meta, value) meta[:line] += line_incr end def self.emit_string(ts, te, meta, value) if !value.valid_encoding? || !value.match?(VALID_STRING) emit(:BAD_UNICODE_ESCAPE, ts, te, meta, value) else replace_escaped_characters_in_place(value) if !value.valid_encoding? emit(:BAD_UNICODE_ESCAPE, ts, te, meta, value) else emit(:STRING, ts, te, meta, value) end end end end end end