# frozen_string_literal: true # Copyright (C) 2009-2020 MongoDB Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. require 'base64' module BSON # Represents binary data. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 class Binary include JSON # A binary is type 0x05 in the BSON spec. # # @since 2.0.0 BSON_TYPE = ::String.new(5.chr, encoding: BINARY).freeze # The mappings of subtypes to their single byte identifiers. # # @note subtype 6 (ciphertext) is used for the Client-Side Encryption # feature. Data represented by this subtype is often encrypted, but # may also be plaintext. All instances of this subtype necessary for # Client-Side Encryption will be created internally by the Ruby driver. # An application should not create new BSON::Binary objects of this subtype. # # @since 2.0.0 SUBTYPES = { generic: 0.chr, function: 1.chr, old: 2.chr, uuid_old: 3.chr, uuid: 4.chr, md5: 5.chr, ciphertext: 6.chr, column: 7.chr, sensitive: 8.chr, user: 128.chr, }.freeze # The starting point of the user-defined subtype range. USER_SUBTYPE = 0x80 # The mappings of single byte subtypes to their symbol counterparts. # # @since 2.0.0 TYPES = SUBTYPES.invert.freeze # @return [ String ] The raw binary data. # # The string is always stored in BINARY encoding. # # @since 2.0.0 attr_reader :data # @return [ Symbol ] The binary type. attr_reader :type # @return [ String ] The raw type value, as an encoded integer. attr_reader :raw_type # Determine if this binary object is equal to another object. # # @example Check the binary equality. # binary == other # # @param [ Object ] other The object to compare against. # # @return [ true, false ] If the objects are equal. # # @since 2.0.0 def ==(other) return false unless other.is_a?(Binary) type == other.type && data == other.data end alias eql? == # Generates a Fixnum hash value for this object. # # Allows using Binary as hash keys. # # @return [ Fixnum ] # # @since 2.3.1 def hash [ data, type ].hash end # Return a representation of the object for use in # application-level JSON serialization. Since BSON::Binary # is used exclusively in BSON-related contexts, this # method returns the canonical Extended JSON representation. # # @return [ Hash ] The extended json representation. def as_json(*_args) as_extended_json end # Converts this object to a representation directly serializable to # Extended JSON (https://github.com/mongodb/specifications/blob/master/source/extended-json.rst). # # @option opts [ nil | :relaxed | :legacy ] :mode Serialization mode # (default is canonical extended JSON) # # @return [ Hash ] The extended json representation. def as_extended_json(**options) subtype = @raw_type.each_byte.map { |c| c.to_s(16) }.join subtype = "0#{subtype}" if subtype.length == 1 value = Base64.encode64(data).strip if options[:mode] == :legacy { '$binary' => value, '$type' => subtype } else { '$binary' => { 'base64' => value, 'subType' => subtype } } end end # Instantiate the new binary object. # # This method accepts a string in any encoding; however, if a string is # of a non-BINARY encoding, the encoding is set to BINARY. This does not # change the bytes of the string but it means that applications referencing # the data of a Binary instance cannot assume it is in a non-binary # encoding, even if the string given to the constructor was in such an # encoding. # # @example Instantiate a binary. # BSON::Binary.new(data, :md5) # # @param [ String ] data The raw binary data. # @param [ Symbol ] type The binary type. # # @since 2.0.0 def initialize(data = '', type = :generic) initialize_instance(data, type) end # For legacy deserialization support where BSON::Binary objects are # expected to have a specific internal representation (with only # @type and @data instance variables). # # @api private def init_with(coder) initialize_instance(coder['data'], coder['type']) end # Get a nice string for use with object inspection. # # @example Inspect the binary. # object_id.inspect # # @return [ String ] The binary in form BSON::Binary:object_id # # @since 2.3.0 def inspect "" end # Returns a string representation of the UUID stored in this Binary. # # If the Binary is of subtype 4 (:uuid), this method returns the UUID # in RFC 4122 format. If the representation parameter is provided, it # must be the value :standard as a symbol or a string. # # If the Binary is of subtype 3 (:uuid_old), this method requires that # the representation parameter is provided and is one of :csharp_legacy, # :java_legacy or :python_legacy or the equivalent strings. In this case # the method assumes the Binary stores the UUID in the specified format, # transforms the stored bytes to the standard RFC 4122 representation # and returns the UUID in RFC 4122 format. # # If the Binary is of another subtype, this method raises TypeError. # # @param [ Symbol ] representation How to interpret the UUID. # # @return [ String ] The string representation of the UUID. # # @raise [ TypeError ] If the subtype of Binary is not :uuid nor :uuid_old. # @raise [ ArgumentError ] If the representation other than :standard # is requested for Binary subtype 4 (:uuid), if :standard representation # is requested for Binary subtype 3 (:uuid_old), or if an invalid # representation is requested. # # @api experimental def to_uuid(representation = nil) if representation.is_a?(String) raise ArgumentError, "Representation must be given as a symbol: #{representation.inspect}" end case type when :uuid from_uuid_to_uuid(representation || :standard) when :uuid_old from_uuid_old_to_uuid(representation) else raise TypeError, "The type of Binary must be :uuid or :uuid_old, this object is: #{type.inspect}" end end # Encode the binary type # # @example Encode the binary. # binary.to_bson # # @return [ BSON::ByteBuffer ] The buffer with the encoded object. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 def to_bson(buffer = ByteBuffer.new) position = buffer.length buffer.put_int32(0) buffer.put_byte(@raw_type) buffer.put_int32(data.bytesize) if type == :old buffer.put_bytes(data) buffer.replace_int32(position, buffer.length - position - 5) end # Deserialize the binary data from BSON. # # @param [ ByteBuffer ] buffer The byte buffer. # # @option options [ nil | :bson ] :mode Decoding mode to use. # # @return [ Binary ] The decoded binary data. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 def self.from_bson(buffer, **_options) length = buffer.get_int32 type_byte = buffer.get_byte if type_byte.bytes.first < USER_SUBTYPE type = TYPES[type_byte] if type.nil? raise Error::UnsupportedBinarySubtype, "BSON data contains unsupported binary subtype #{'0x%02x' % type_byte.ord}" end else type = type_byte end length = buffer.get_int32 if type == :old data = buffer.get_bytes(length) new(data, type) end # Creates a BSON::Binary from a string representation of a UUID. # # The UUID may be given in either 00112233-4455-6677-8899-aabbccddeeff or # 00112233445566778899AABBCCDDEEFF format - specifically, any dashes in # the UUID are removed and both upper and lower case letters are acceptable. # # The input UUID string is always interpreted to be in the RFC 4122 format. # # If representation is not provided, this method creates a BSON::Binary # of subtype 4 (:uuid). If representation is provided, it must be one of # :standard, :csharp_legacy, :java_legacy or :python_legacy. If # representation is :standard, this method creates a subtype 4 (:uuid) # binary which is the same behavior as if representation was not provided. # For other representations, this method creates a Binary of subtype 3 # (:uuid_old) with the UUID converted to the appropriate legacy MongoDB # UUID storage format. # # @param [ String ] uuid The string representation of the UUID. # @param [ Symbol ] representation How to interpret the UUID. # # @return [ Binary ] The binary. # # @raise [ ArgumentError ] If invalid representation is requested. # # @api experimental def self.from_uuid(uuid, representation = nil) raise ArgumentError, "Representation must be given as a symbol: #{representation}" if representation.is_a?(String) uuid_binary = uuid.delete('-').scan(/../).map(&:hex).map(&:chr).join representation ||= :standard handler = :"from_#{representation}_uuid" raise ArgumentError, "Invalid representation: #{representation}" unless respond_to?(handler) send(handler, uuid_binary) end # Constructs a new binary object from a standard-format binary UUID # representation. # # @param [ String ] uuid_binary the UUID data # # @return [ BSON::Binary ] the Binary object # # @api private def self.from_standard_uuid(uuid_binary) new(uuid_binary, :uuid) end # Constructs a new binary object from a csharp legacy-format binary UUID # representation. # # @param [ String ] uuid_binary the UUID data # # @return [ BSON::Binary ] the Binary object # # @api private def self.from_csharp_legacy_uuid(uuid_binary) uuid_binary.sub!(/\A(.)(.)(.)(.)(.)(.)(.)(.)(.{8})\z/, '\4\3\2\1\6\5\8\7\9') new(uuid_binary, :uuid_old) end # Constructs a new binary object from a java legacy-format binary UUID # representation. # # @param [ String ] uuid_binary the UUID data # # @return [ BSON::Binary ] the Binary object # # @api private def self.from_java_legacy_uuid(uuid_binary) uuid_binary.sub!(/\A(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\z/) do (::Regexp.last_match[1..8].reverse + ::Regexp.last_match[9..16].reverse).join end new(uuid_binary, :uuid_old) end # Constructs a new binary object from a python legacy-format binary UUID # representation. # # @param [ String ] uuid_binary the UUID data # # @return [ BSON::Binary ] the Binary object # # @api private def self.from_python_legacy_uuid(uuid_binary) new(uuid_binary, :uuid_old) end private # initializes an instance of BSON::Binary. # # @param [ String ] data the data to initialize the object with # @param [ Symbol ] type the type to assign the binary object def initialize_instance(data, type) @type = validate_type!(type) # The Binary class used to force encoding to BINARY when serializing to # BSON. Instead of doing that during serialization, perform this # operation during Binary construction to make it clear that once # the string is given to the Binary, the data is treated as a binary # string and not a text string in any encoding. data = data.dup.force_encoding('BINARY') unless data.encoding == Encoding.find('BINARY') @data = data end # Converts the Binary UUID object to a UUID of the given representation. # Currently, only :standard representation is supported. # # @param [ Symbol ] representation The representation to target (must be # :standard) # # @return [ String ] the UUID as a string def from_uuid_to_uuid(representation) if representation != :standard raise ArgumentError, 'Binary of type :uuid can only be stringified to :standard representation, ' \ "requested: #{representation.inspect}" end data .chars .map { |n| '%02x' % n.ord } .join .sub(/\A(.{8})(.{4})(.{4})(.{4})(.{12})\z/, '\1-\2-\3-\4-\5') end # Converts the UUID-old object to a UUID of the given representation. # # @param [ Symbol ] representation The representation to target # # @return [ String ] the UUID as a string def from_uuid_old_to_uuid(representation) if representation.nil? raise ArgumentError, 'Representation must be specified for BSON::Binary objects of type :uuid_old' end hex = data.chars.map { |n| '%02x' % n.ord }.join handler = :"from_uuid_old_to_#{representation}_uuid" raise ArgumentError, "Invalid representation: #{representation}" unless respond_to?(handler, true) send(handler, hex) .sub(/\A(.{8})(.{4})(.{4})(.{4})(.{12})\z/, '\1-\2-\3-\4-\5') end # Tries to convert a UUID-old object to a standard representation, which is # not supported. # # @param [ String ] hex The hexadecimal string to convert # # @raise [ ArgumentError ] because standard representation is not supported def from_uuid_old_to_standard_uuid(_hex) raise ArgumentError, 'BSON::Binary objects of type :uuid_old cannot be stringified to :standard representation' end # Converts a UUID-old object to a csharp-legacy representation. # # @param [ String ] hex The hexadecimal string to convert # # @return [ String ] the csharp-legacy-formatted UUID def from_uuid_old_to_csharp_legacy_uuid(hex) hex.sub(/\A(..)(..)(..)(..)(..)(..)(..)(..)(.{16})\z/, '\4\3\2\1\6\5\8\7\9') end # Converts a UUID-old object to a java-legacy representation. # # @param [ String ] hex The hexadecimal string to convert # # @return [ String ] the java-legacy-formatted UUID def from_uuid_old_to_java_legacy_uuid(hex) hex.sub(/\A(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)\z/) do (::Regexp.last_match[1..8].reverse + ::Regexp.last_match[9..16].reverse).join end end # Converts a UUID-old object to a python-legacy representation. # # @param [ String ] hex The hexadecimal string to convert # # @return [ String ] the python-legacy-formatted UUID def from_uuid_old_to_python_legacy_uuid(hex) hex end # Validate the provided type is a valid type. # # @api private # # @example Validate the type. # binary.validate_type!(:user) # # @param [ Symbol | String | Integer ] type The provided type. # # @return [ Symbol ] the symbolic type corresponding to the argument. # # @raise [ BSON::Error::InvalidBinaryType ] The the type is invalid. # # @since 2.0.0 def validate_type!(type) case type when Integer then validate_integer_type!(type) when String if type.length > 1 validate_symbol_type!(type.to_sym) else validate_integer_type!(type.bytes.first) end when Symbol then validate_symbol_type!(type) else raise BSON::Error::InvalidBinaryType, type end end # Test that the given integer type is valid. # # @param [ Integer ] type the provided type # # @return [ Symbol ] the symbolic type corresponding to the argument. # # @raise [ BSON::Error::InvalidBinaryType] if the type is invalid. def validate_integer_type!(type) @raw_type = type.chr.force_encoding('BINARY').freeze if type < USER_SUBTYPE raise BSON::Error::InvalidBinaryType, type unless TYPES.key?(@raw_type) return TYPES[@raw_type] end :user end # Test that the given symbol type is valid. # # @param [ Symbol ] type the provided type # # @return [ Symbol ] the symbolic type corresponding to the argument. # # @raise [ BSON::Error::InvalidBinaryType] if the type is invalid. def validate_symbol_type!(type) raise BSON::Error::InvalidBinaryType, type unless SUBTYPES.key?(type) @raw_type = SUBTYPES[type] type end # Register this type when the module is loaded. # # @since 2.0.0 Registry.register(BSON_TYPE, self) end end