# frozen_string_literal: true # rubocop:todo all # Copyright (C) 2019-2020 MongoDB Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. require 'json' module BSON # This module contains methods for parsing Extended JSON 2.0. # https://github.com/mongodb/specifications/blob/master/source/extended-json.rst module ExtJSON # Parses JSON in a string into a Ruby object tree. # # There are two strategies that this method can follow. If the canonical # strategy is used which is the default, this method returns BSON types # as much as possible. This allows the resulting object tree to be # serialized back to extended JSON or to BSON while preserving the types. # The relaxed strategy, enabled by passing {emit_relaxed: true} option, # returns native Ruby types as much as possible which makes the resulting # object tree easier to work with but may lose type information. # # Please note the following aspects of this method when emitting relaxed # object trees: # # 1. $numberInt and $numberLong inputs produce Integer instances. # 2. $regularExpression inputs produce BSON Regexp instances. This may # change in a future version of bson-ruby to produce Ruby Regexp # instances, potentially depending on regular expression options. # 3. $numberDecimal inputs produce BSON Decimal128 instances. This may # change in a future version of bson-ruby to produce Ruby BigDecimal # instances instead. # # This method accepts canonical extended JSON, relaxed extended JSON and # JSON without type information as well as a mix of the above. # # @note This method uses Ruby standard library's JSON.parse method to # perform JSON parsing. As the JSON.parse method accepts inputs other # than hashes, so does this method and therefore this method can return # objects of any type. # # @param [ String ] str The string to parse. # # @option options [ nil | :bson ] :mode Which types to emit # # @return [ Object ] Parsed object tree. module_function def parse(str, **options) parse_obj(::JSON.parse(str), **options) end # Transforms a Ruby object tree containing extended JSON type hashes # into a Ruby object tree with said hashes replaced by BSON or Ruby native # types. # # @example Convert extended JSON type hashes: # BSON::ExtJSON.parse_obj('foo' => {'$numberLong' => '42'}) # => {"foo"=>#} # # @example Convert a non-hash value: # BSON::ExtJSON.parse_obj('$numberLong' => '42') # => # # # There are two strategies that this method can follow. If the canonical # strategy is used which is the default, this method returns BSON types # as much as possible. This allows the resulting object tree to be # serialized back to extended JSON or to BSON while preserving the types. # The relaxed strategy, enabled by passing {emit_relaxed: true} option, # returns native Ruby types as much as possible which makes the resulting # object tree easier to work with but may lose type information. # # Please note the following aspects of this method when emitting relaxed # object trees: # # 1. $numberInt and $numberLong inputs produce Integer instances. # 2. $regularExpression inputs produce BSON Regexp instances. This may # change in a future version of bson-ruby to produce Ruby Regexp # instances, potentially depending on regular expression options. # 3. $numberDecimal inputs produce BSON Decimal128 instances. This may # change in a future version of bson-ruby to produce Ruby BigDecimal # instances instead. # # This method accepts object trees resulting from parsing canonical # extended JSON, relaxed extended JSON and JSON without type information # as well as a mix of the above. # # @note This method accepts any types as input, not just Hash instances. # Consequently, it can return values of any type. # # @param [ Object ] value The object tree to convert. # # @option options [ nil | :bson ] :mode Which types to emit # # @return [ Object ] Converted object tree. module_function def parse_obj(value, **options) # TODO implement :ruby and :ruby! modes unless [nil, :bson].include?(options[:mode]) raise ArgumentError, "Invalid value for :mode option: #{options[:mode].inspect}" end case value when String, TrueClass, FalseClass, NilClass, Numeric value when Hash parse_hash(value, **options) when Array value.map do |item| parse_obj(item, **options) end else raise Error::ExtJSONParseError, "Unknown value type: #{value}" end end private RESERVED_KEYS = %w( $oid $symbol $numberInt $numberLong $numberDouble $numberDecimal $binary $code $scope $timestamp $regularExpression $dbPointer $date $minKey $maxKey $undefined ).freeze RESERVED_KEYS_HASH = Hash[RESERVED_KEYS.map do |key| [key, true] end].freeze module_function def parse_hash(hash, **options) if hash.empty? return {} end if dbref?(hash) # Legacy dbref handling. # Note that according to extended json spec, only hash values (but # not the top-level BSON document itself) may be of type "dbref". # This code applies to both hash values and the hash overall; however, # since we do not have DBRef as a distinct type, applying the below # logic to top level hashes doesn't cause harm. hash = hash.dup ref = hash.delete('$ref') # $id, if present, can be anything id = hash.delete('$id') if id.is_a?(Hash) id = parse_hash(id) end # Preserve $id value as it was, do not convert either to ObjectId # or to a string. But if the value was in {'$oid' => ...} format, # the value is converted to an ObjectId instance so that # serialization to BSON later on works correctly. out = {'$ref' => ref, '$id' => id} if hash.key?('$db') # $db must always be a string, if provided out['$db'] = hash.delete('$db') end return out.update(parse_hash(hash)) end if hash.length == 1 key, value = hash.first return case key when '$oid' ObjectId.from_string(value) when '$symbol' Symbol::Raw.new(value) when '$numberInt' unless value.is_a?(String) raise Error::ExtJSONParseError, "$numberInt value is of an incorrect type: #{value}" end value.to_i when '$numberLong' unless value.is_a?(String) raise Error::ExtJSONParseError, "$numberLong value is of an incorrect type: #{value}" end value = value.to_i if options[:mode] != :bson value else Int64.new(value) end when '$numberDouble' # This handles string to double conversion as well as inf/-inf/nan unless value.is_a?(String) raise Error::ExtJSONParseError, "Invalid $numberDouble value: #{value}" end BigDecimal(value).to_f when '$numberDecimal' # TODO consider returning BigDecimal here instead of Decimal128 Decimal128.new(value) when '$binary' unless value.is_a?(Hash) raise Error::ExtJSONParseError, "Invalid $binary value: #{value}" end unless value.keys.sort == %w(base64 subType) raise Error::ExtJSONParseError, "Invalid $binary value: #{value}" end encoded_value = value['base64'] unless encoded_value.is_a?(String) raise Error::ExtJSONParseError, "Invalid base64 value in $binary: #{value}" end subtype = value['subType'] unless subtype.is_a?(String) raise Error::ExtJSONParseError, "Invalid subType value in $binary: #{value}" end create_binary(encoded_value, subtype) when '$uuid' unless /\A[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\z/.match(value) raise Error::ExtJSONParseError, "Invalid $uuid value: #{value}" end return Binary.from_uuid(value) when '$code' unless value.is_a?(String) raise Error::ExtJSONParseError, "Invalid $code value: #{value}" end Code.new(value) when '$timestamp' unless value.keys.sort == %w(i t) raise Error::ExtJSONParseError, "Invalid $timestamp value: #{value}" end t = value['t'] unless t.is_a?(Integer) raise Error::ExtJSONParseError, "Invalid t value: #{value}" end i = value['i'] unless i.is_a?(Integer) raise Error::ExtJSONParseError, "Invalid i value: #{value}" end Timestamp.new(t, i) when '$regularExpression' unless value.keys.sort == %w(options pattern) raise Error::ExtJSONParseError, "Invalid $regularExpression value: #{value}" end # TODO consider returning Ruby regular expression object here create_regexp(value['pattern'], value['options']) when '$dbPointer' unless value.keys.sort == %w($id $ref) raise Error::ExtJSONParseError, "Invalid $dbPointer value: #{value}" end DbPointer.new(value['$ref'], parse_hash(value['$id'])) when '$date' case value when String ::Time.parse(value).utc when Hash unless value.keys.sort == %w($numberLong) raise Error::ExtJSONParseError, "Invalid value for $date: #{value}" end sec, msec = value.values.first.to_i.divmod(1000) ::Time.at(sec, msec*1000).utc else raise Error::ExtJSONParseError, "Invalid value for $date: #{value}" end when '$minKey' unless value == 1 raise Error::ExtJSONParseError, "Invalid $minKey value: #{value}" end MinKey.new when '$maxKey' unless value == 1 raise Error::ExtJSONParseError, "Invalid $maxKey value: #{value}" end MaxKey.new when '$undefined' unless value == true raise Error::ExtJSONParseError, "Invalid $undefined value: #{value}" end Undefined.new else map_hash(hash, **options) end end if hash.length == 2 sorted_keys = hash.keys.sort first_key = sorted_keys.first last_key = sorted_keys.last if first_key == '$code' unless sorted_keys == %w($code $scope) raise Error::ExtJSONParseError, "Invalid $code value: #{hash}" end unless hash['$code'].is_a?(String) raise Error::ExtJSONParseError, "Invalid $code value: #{value}" end return CodeWithScope.new(hash['$code'], map_hash(hash['$scope'])) end if first_key == '$binary' unless sorted_keys == %w($binary $type) raise Error::ExtJSONParseError, "Invalid $binary value: #{hash}" end unless hash['$binary'].is_a?(String) raise Error::ExtJSONParseError, "Invalid $binary value: #{value}" end unless hash['$type'].is_a?(String) raise Error::ExtJSONParseError, "Invalid $binary subtype: #{hash['$type']}" end return create_binary(hash['$binary'], hash['$type']) end if last_key == '$regex' unless sorted_keys == %w($options $regex) raise Error::ExtJSONParseError, "Invalid $regex value: #{hash}" end if hash['$regex'].is_a?(Hash) return { '$regex' => parse_hash(hash['$regex']), '$options' => hash['$options'] } end unless hash['$regex'].is_a?(String) raise Error::ExtJSONParseError, "Invalid $regex pattern: #{hash['$regex']}" end unless hash['$options'].is_a?(String) raise Error::ExtJSONParseError, "Invalid $regex options: #{hash['$options']}" end return create_regexp(hash['$regex'], hash['$options']) end verify_no_reserved_keys(hash, **options) end verify_no_reserved_keys(hash, **options) end module_function def verify_no_reserved_keys(hash, **options) if hash.length > RESERVED_KEYS.length if RESERVED_KEYS.any? { |key| hash.key?(key) } raise Error::ExtJSONParseError, "Hash uses reserved keys but does not match a known type: #{hash}" end else if hash.keys.any? { |key| RESERVED_KEYS_HASH.key?(key) } raise Error::ExtJSONParseError, "Hash uses reserved keys but does not match a known type: #{hash}" end end map_hash(hash, **options) end module_function def map_hash(hash, **options) ::Hash[hash.map do |key, value| if (key.is_a?(String) || key.is_a?(Symbol)) && key.to_s.include?(NULL_BYTE) raise Error::ExtJSONParseError, "Hash key cannot contain a null byte: #{key}" end [key, parse_obj(value, **options)] end] end module_function def create_binary(encoded_value, encoded_subtype) subtype = encoded_subtype.hex type = Binary::TYPES[subtype.chr] unless type # Requires https://jira.mongodb.org/browse/RUBY-2056 raise NotImplementedError, "Binary subtype #{encoded_subtype} is not currently supported" end Binary.new(Base64.decode64(encoded_value), type) end module_function def create_regexp(pattern, options) Regexp::Raw.new(pattern, options) end module_function def dbref?(hash) if db = hash.key?('$db') unless db.is_a?(String) return false end end return hash['$ref']&.is_a?(String) && hash.key?('$id') end end end