require 'strscan' module JSON module Pure # This class implements the JSON parser that is used to parse a JSON string # into a Ruby data structure. class Parser < StringScanner STRING = /" ((?:[^\x0-\x1f"\\] | # escaped special characters: \\["\\\/bfnrt] | \\u[0-9a-fA-F]{4} | # match all but escaped special characters: \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*) "/nx INTEGER = /(-?0|-?[1-9]\d*)/ FLOAT = /(-? (?:0|[1-9]\d*) (?: \.\d+(?i:e[+-]?\d+) | \.\d+ | (?i:e[+-]?\d+) ) )/x NAN = /NaN/ INFINITY = /Infinity/ MINUS_INFINITY = /-Infinity/ OBJECT_OPEN = /\{/ OBJECT_CLOSE = /\}/ ARRAY_OPEN = /\[/ ARRAY_CLOSE = /\]/ PAIR_DELIMITER = /:/ COLLECTION_DELIMITER = /,/ TRUE = /true/ FALSE = /false/ NULL = /null/ IGNORE = %r( (?: //[^\n\r]*[\n\r]| # line comments /\* # c-style comments (?: [^*/]| # normal chars /[^*]| # slashes that do not start a nested comment \*[^/]| # asterisks that do not end this comment /(?=\*/) # single slash before this comment's end )* \*/ # the End of this comment |[ \t\r\n]+ # whitespaces: space, horicontal tab, lf, cr )+ )mx UNPARSED = Object.new # Creates a new JSON::Pure::Parser instance for the string _source_. # # It will be configured by the _opts_ hash. _opts_ can have the following # keys: # * *max_nesting*: The maximum depth of nesting allowed in the parsed data # structures. Disable depth checking with :max_nesting => false|nil|0, # it defaults to 19. # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in # defiance of RFC 4627 to be parsed by the Parser. This option defaults # to false. # * *symbolize_names*: If set to true, returns symbols for the names # (keys) in a JSON object. Otherwise strings are returned, which is also # the default. # * *create_additions*: If set to true, the Parser creates # additions when if a matching class and create_id was found. This # option defaults to false. # * *object_class*: Defaults to Hash # * *array_class*: Defaults to Array # * *quirks_mode*: Enables quirks_mode for parser, that is for example # parsing single JSON values instead of documents is possible. def initialize(source, opts = {}) opts ||= {} unless @quirks_mode = opts[:quirks_mode] source = determine_encoding source end super source if !opts.key?(:max_nesting) # defaults to 19 @max_nesting = 19 elsif opts[:max_nesting] @max_nesting = opts[:max_nesting] else @max_nesting = 0 end @allow_nan = !!opts[:allow_nan] @symbolize_names = !!opts[:symbolize_names] if opts.key?(:create_additions) @create_additions = !!opts[:create_additions] else @create_additions = false end @create_id = @create_additions ? JSON.create_id : nil @object_class = opts[:object_class] || Hash @array_class = opts[:array_class] || Array @match_string = opts[:match_string] end alias source string def quirks_mode? !!@quirks_mode end def reset super @current_nesting = 0 end # Parses the current JSON string _source_ and returns the complete data # structure as a result. def parse reset obj = nil if @quirks_mode while !eos? && skip(IGNORE) end if eos? raise ParserError, "source did not contain any JSON!" else obj = parse_value obj == UNPARSED and raise ParserError, "source did not contain any JSON!" end else until eos? case when scan(OBJECT_OPEN) obj and raise ParserError, "source '#{peek(20)}' not in JSON!" @current_nesting = 1 obj = parse_object when scan(ARRAY_OPEN) obj and raise ParserError, "source '#{peek(20)}' not in JSON!" @current_nesting = 1 obj = parse_array when skip(IGNORE) ; else raise ParserError, "source '#{peek(20)}' not in JSON!" end end obj or raise ParserError, "source did not contain any JSON!" end obj end private def determine_encoding(source) if defined?(::Encoding) if source.encoding == ::Encoding::ASCII_8BIT b = source[0, 4].bytes.to_a source = case when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0 source.dup.force_encoding(::Encoding::UTF_32BE).encode!(::Encoding::UTF_8) when b.size >= 4 && b[0] == 0 && b[2] == 0 source.dup.force_encoding(::Encoding::UTF_16BE).encode!(::Encoding::UTF_8) when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0 source.dup.force_encoding(::Encoding::UTF_32LE).encode!(::Encoding::UTF_8) when b.size >= 4 && b[1] == 0 && b[3] == 0 source.dup.force_encoding(::Encoding::UTF_16LE).encode!(::Encoding::UTF_8) else source.dup end else source = source.encode(::Encoding::UTF_8) end source.force_encoding(::Encoding::ASCII_8BIT) else b = source source = case when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0 JSON.iconv('utf-8', 'utf-32be', b) when b.size >= 4 && b[0] == 0 && b[2] == 0 JSON.iconv('utf-8', 'utf-16be', b) when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0 JSON.iconv('utf-8', 'utf-32le', b) when b.size >= 4 && b[1] == 0 && b[3] == 0 JSON.iconv('utf-8', 'utf-16le', b) else b end end source end # Unescape characters in strings. UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr } UNESCAPE_MAP.update({ ?" => '"', ?\\ => '\\', ?/ => '/', ?b => "\b", ?f => "\f", ?n => "\n", ?r => "\r", ?t => "\t", ?u => nil, }) EMPTY_8BIT_STRING = '' if ::String.method_defined?(:encode) EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT end def parse_string if scan(STRING) return '' if self[1].empty? string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c| if u = UNESCAPE_MAP[$&[1]] u else # \uXXXX bytes = EMPTY_8BIT_STRING.dup i = 0 while c[6 * i] == ?\\ && c[6 * i + 1] == ?u bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16) i += 1 end JSON.iconv('utf-8', 'utf-16be', bytes) end end if string.respond_to?(:force_encoding) string.force_encoding(::Encoding::UTF_8) end if @create_additions and @match_string for (regexp, klass) in @match_string klass.json_creatable? or next string =~ regexp and return klass.json_create(string) end end string else UNPARSED end rescue => e raise ParserError, "Caught #{e.class} at '#{peek(20)}': #{e}" end def parse_value case when scan(FLOAT) Float(self[1]) when scan(INTEGER) Integer(self[1]) when scan(TRUE) true when scan(FALSE) false when scan(NULL) nil when (string = parse_string) != UNPARSED string when scan(ARRAY_OPEN) @current_nesting += 1 ary = parse_array @current_nesting -= 1 ary when scan(OBJECT_OPEN) @current_nesting += 1 obj = parse_object @current_nesting -= 1 obj when @allow_nan && scan(NAN) NaN when @allow_nan && scan(INFINITY) Infinity when @allow_nan && scan(MINUS_INFINITY) MinusInfinity else UNPARSED end end def parse_array raise NestingError, "nesting of #@current_nesting is too deep" if @max_nesting.nonzero? && @current_nesting > @max_nesting result = @array_class.new delim = false until eos? case when (value = parse_value) != UNPARSED delim = false result << value skip(IGNORE) if scan(COLLECTION_DELIMITER) delim = true elsif match?(ARRAY_CLOSE) ; else raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" end when scan(ARRAY_CLOSE) if delim raise ParserError, "expected next element in array at '#{peek(20)}'!" end break when skip(IGNORE) ; else raise ParserError, "unexpected token in array at '#{peek(20)}'!" end end result end def parse_object raise NestingError, "nesting of #@current_nesting is too deep" if @max_nesting.nonzero? && @current_nesting > @max_nesting result = @object_class.new delim = false until eos? case when (string = parse_string) != UNPARSED skip(IGNORE) unless scan(PAIR_DELIMITER) raise ParserError, "expected ':' in object at '#{peek(20)}'!" end skip(IGNORE) unless (value = parse_value).equal? UNPARSED result[@symbolize_names ? string.to_sym : string] = value delim = false skip(IGNORE) if scan(COLLECTION_DELIMITER) delim = true elsif match?(OBJECT_CLOSE) ; else raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!" end else raise ParserError, "expected value in object at '#{peek(20)}'!" end when scan(OBJECT_CLOSE) if delim raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" end if @create_additions and klassname = result[@create_id] klass = JSON.deep_const_get klassname break unless klass and klass.json_creatable? result = klass.json_create(result) end break when skip(IGNORE) ; else raise ParserError, "unexpected token in object at '#{peek(20)}'!" end end result end end end end