lib/mongo/util/bson.rb in mongo-0.1.0 vs lib/mongo/util/bson.rb in mongo-0.15

- old
+ new

@@ -19,15 +19,16 @@ require 'mongo/util/ordered_hash' require 'mongo/types/binary' require 'mongo/types/dbref' require 'mongo/types/objectid' require 'mongo/types/regexp_of_holding' -require 'mongo/types/undefined' # A BSON seralizer/deserializer. class BSON + include Mongo + MINKEY = -1 EOO = 0 NUMBER = 1 STRING = 2 OBJECT = 3 @@ -42,145 +43,194 @@ REF = 12 CODE = 13 SYMBOL = 14 CODE_W_SCOPE = 15 NUMBER_INT = 16 + TIMESTAMP = 17 + NUMBER_LONG = 18 MAXKEY = 127 if RUBY_VERSION >= '1.9' def self.to_utf8(str) str.encode("utf-8") end else def self.to_utf8(str) - str # TODO punt for now + str # TODO Ruby 1.8 punt for now end end def self.serialize_cstr(buf, val) buf.put_array(to_utf8(val.to_s).unpack("C*") + [0]) end - def initialize(db=nil) - # db is only needed during deserialization when the data contains a DBRef - @db = db + def initialize() @buf = ByteBuffer.new end def to_a @buf.to_a end - def serialize(obj) - raise "Document is null" unless obj + begin + require 'mongo_ext/cbson' + def serialize(obj, check_keys=false) + @buf = ByteBuffer.new(CBson.serialize(obj, check_keys)) + end + rescue LoadError + def serialize(obj, check_keys=false) + raise "Document is null" unless obj - @buf.rewind - # put in a placeholder for the total size - @buf.put_int(0) + @buf.rewind + # put in a placeholder for the total size + @buf.put_int(0) - obj.each {|k, v| - type = bson_type(v, k) - case type - when STRING, CODE, SYMBOL - serialize_string_element(@buf, k, v, type) - when NUMBER, NUMBER_INT - serialize_number_element(@buf, k, v, type) - when OBJECT - serialize_object_element(@buf, k, v) - when OID - serialize_oid_element(@buf, k, v) - when ARRAY - serialize_array_element(@buf, k, v) - when REGEX - serialize_regex_element(@buf, k, v) - when BOOLEAN - serialize_boolean_element(@buf, k, v) - when DATE - serialize_date_element(@buf, k, v) - when NULL - serialize_null_element(@buf, k) - when REF - serialize_dbref_element(@buf, k, v) - when BINARY - serialize_binary_element(@buf, k, v) - when UNDEFINED - serialize_undefined_element(@buf, k) - when CODE_W_SCOPE - # TODO - raise "unimplemented type #{type}" - else - raise "unhandled type #{type}" + # Write key/value pairs. Always write _id first if it exists. + if obj.has_key? '_id' + serialize_key_value('_id', obj['_id'], check_keys) + elsif obj.has_key? :_id + serialize_key_value('_id', obj[:_id], check_keys) end - } - serialize_eoo_element(@buf) - @buf.put_int(@buf.size, 0) - self + + obj.each {|k, v| serialize_key_value(k, v, check_keys) unless k == '_id' || k == :_id } + + serialize_eoo_element(@buf) + @buf.put_int(@buf.size, 0) + self + end end - def deserialize(buf=nil, parent=nil) - # If buf is nil, use @buf, assumed to contain already-serialized BSON. - # This is only true during testing. - @buf = ByteBuffer.new(buf.to_a) if buf - @buf.rewind - @buf.get_int # eat message size - doc = OrderedHash.new - while @buf.more? - type = @buf.get - case type - when STRING, CODE - key = deserialize_cstr(@buf) - doc[key] = deserialize_string_data(@buf) - when SYMBOL - key = deserialize_cstr(@buf) - doc[key] = deserialize_string_data(@buf).intern - when NUMBER - key = deserialize_cstr(@buf) - doc[key] = deserialize_number_data(@buf) - when NUMBER_INT - key = deserialize_cstr(@buf) - doc[key] = deserialize_number_int_data(@buf) - when OID - key = deserialize_cstr(@buf) - doc[key] = deserialize_oid_data(@buf) - when ARRAY - key = deserialize_cstr(@buf) - doc[key] = deserialize_array_data(@buf, doc) - when REGEX - key = deserialize_cstr(@buf) - doc[key] = deserialize_regex_data(@buf) - when OBJECT - key = deserialize_cstr(@buf) - doc[key] = deserialize_object_data(@buf, doc) - when BOOLEAN - key = deserialize_cstr(@buf) - doc[key] = deserialize_boolean_data(@buf) - when DATE - key = deserialize_cstr(@buf) - doc[key] = deserialize_date_data(@buf) - when NULL - key = deserialize_cstr(@buf) - doc[key] = nil - when UNDEFINED - key = deserialize_cstr(@buf) - doc[key] = XGen::Mongo::Driver::Undefined.new - when REF - key = deserialize_cstr(@buf) - doc[key] = deserialize_dbref_data(@buf, key, parent) - when BINARY - key = deserialize_cstr(@buf) - doc[key] = deserialize_binary_data(@buf) - when CODE_W_SCOPE - # TODO - raise "unimplemented type #{type}" - when EOO - break + def serialize_key_value(k, v, check_keys) + k = k.to_s + if check_keys + if k[0] == ?$ + raise InvalidName.new("key #{k} must not start with '$'") + end + if k.include? ?. + raise InvalidName.new("key #{k} must not contain '.'") + end + end + type = bson_type(v) + case type + when STRING, SYMBOL + serialize_string_element(@buf, k, v, type) + when NUMBER, NUMBER_INT + serialize_number_element(@buf, k, v, type) + when OBJECT + serialize_object_element(@buf, k, v, check_keys) + when OID + serialize_oid_element(@buf, k, v) + when ARRAY + serialize_array_element(@buf, k, v, check_keys) + when REGEX + serialize_regex_element(@buf, k, v) + when BOOLEAN + serialize_boolean_element(@buf, k, v) + when DATE + serialize_date_element(@buf, k, v) + when NULL + serialize_null_element(@buf, k) + when REF + serialize_dbref_element(@buf, k, v) + when BINARY + serialize_binary_element(@buf, k, v) + when UNDEFINED + serialize_null_element(@buf, k) + when CODE_W_SCOPE + serialize_code_w_scope(@buf, k, v) + else + raise "unhandled type #{type}" + end + end + + begin + require 'mongo_ext/cbson' + def deserialize(buf=nil) + if buf.is_a? String + @buf = ByteBuffer.new(buf) if buf else - raise "Unknown type #{type}, key = #{key}" + @buf = ByteBuffer.new(buf.to_a) if buf end + @buf.rewind + CBson.deserialize(@buf.to_s) end - @buf.rewind - doc + rescue LoadError + def deserialize(buf=nil) + # If buf is nil, use @buf, assumed to contain already-serialized BSON. + # This is only true during testing. + if buf.is_a? String + @buf = ByteBuffer.new(buf) if buf + else + @buf = ByteBuffer.new(buf.to_a) if buf + end + @buf.rewind + @buf.get_int # eat message size + doc = OrderedHash.new + while @buf.more? + type = @buf.get + case type + when STRING, CODE + key = deserialize_cstr(@buf) + doc[key] = deserialize_string_data(@buf) + when SYMBOL + key = deserialize_cstr(@buf) + doc[key] = deserialize_string_data(@buf).intern + when NUMBER + key = deserialize_cstr(@buf) + doc[key] = deserialize_number_data(@buf) + when NUMBER_INT + key = deserialize_cstr(@buf) + doc[key] = deserialize_number_int_data(@buf) + when NUMBER_LONG + key = deserialize_cstr(@buf) + doc[key] = deserialize_number_long_data(@buf) + when OID + key = deserialize_cstr(@buf) + doc[key] = deserialize_oid_data(@buf) + when ARRAY + key = deserialize_cstr(@buf) + doc[key] = deserialize_array_data(@buf) + when REGEX + key = deserialize_cstr(@buf) + doc[key] = deserialize_regex_data(@buf) + when OBJECT + key = deserialize_cstr(@buf) + doc[key] = deserialize_object_data(@buf) + when BOOLEAN + key = deserialize_cstr(@buf) + doc[key] = deserialize_boolean_data(@buf) + when DATE + key = deserialize_cstr(@buf) + doc[key] = deserialize_date_data(@buf) + when NULL + key = deserialize_cstr(@buf) + doc[key] = nil + when UNDEFINED + key = deserialize_cstr(@buf) + doc[key] = nil + when REF + key = deserialize_cstr(@buf) + doc[key] = deserialize_dbref_data(@buf) + when BINARY + key = deserialize_cstr(@buf) + doc[key] = deserialize_binary_data(@buf) + when CODE_W_SCOPE + key = deserialize_cstr(@buf) + doc[key] = deserialize_code_w_scope_data(@buf) + when TIMESTAMP + key = deserialize_cstr(@buf) + doc[key] = [deserialize_number_int_data(@buf), + deserialize_number_int_data(@buf)] + when EOO + break + else + raise "Unknown type #{type}, key = #{key}" + end + end + @buf.rewind + doc + end end # For debugging. def hex_dump str = '' @@ -195,12 +245,14 @@ } str end def deserialize_date_data(buf) - millisecs = buf.get_long() - Time.at(millisecs.to_f / 1000.0) # at() takes fractional seconds + unsigned = buf.get_long() + # see note for deserialize_number_long_data below + milliseconds = unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned + Time.at(milliseconds.to_f / 1000.0).utc # at() takes fractional seconds end def deserialize_boolean_data(buf) buf.get == 1 end @@ -208,21 +260,35 @@ def deserialize_number_data(buf) buf.get_double end def deserialize_number_int_data(buf) - buf.get_int + # sometimes ruby makes me angry... why would the same code pack as signed + # but unpack as unsigned + unsigned = buf.get_int + unsigned >= 2**32 / 2 ? unsigned - 2**32 : unsigned end - def deserialize_object_data(buf, parent) + def deserialize_number_long_data(buf) + # same note as above applies here... + unsigned = buf.get_long + unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned + end + + def deserialize_object_data(buf) size = buf.get_int buf.position -= 4 - BSON.new(@db).deserialize(buf.get(size), parent) + object = BSON.new().deserialize(buf.get(size)) + if object.has_key? "$ref" + DBRef.new(object["$ref"], object["$id"]) + else + object + end end - def deserialize_array_data(buf, parent) - h = deserialize_object_data(buf, parent) + def deserialize_array_data(buf) + h = deserialize_object_data(buf) a = [] h.each { |k, v| a[k.to_i] = v } a end @@ -232,39 +298,59 @@ options = 0 options |= Regexp::IGNORECASE if options_str.include?('i') options |= Regexp::MULTILINE if options_str.include?('m') options |= Regexp::EXTENDED if options_str.include?('x') options_str.gsub!(/[imx]/, '') # Now remove the three we understand - XGen::Mongo::Driver::RegexpOfHolding.new(str, options, options_str) + RegexpOfHolding.new(str, options, options_str) end def deserialize_string_data(buf) len = buf.get_int bytes = buf.get(len) - str = bytes[0..-2].pack("C*") + str = bytes[0..-2] + if str.respond_to? "pack" + str = str.pack("C*") + end if RUBY_VERSION >= '1.9' str.force_encoding("utf-8") end str end + def deserialize_code_w_scope_data(buf) + buf.get_int + len = buf.get_int + code = buf.get(len)[0..-2] + if code.respond_to? "pack" + code = code.pack("C*") + end + if RUBY_VERSION >= '1.9' + code.force_encoding("utf-8") + end + + scope_size = buf.get_int + buf.position -= 4 + scope = BSON.new().deserialize(buf.get(scope_size)) + + Code.new(code, scope) + end + def deserialize_oid_data(buf) - XGen::Mongo::Driver::ObjectID.new(buf.get(12)) + ObjectID.new(buf.get(12)) end - def deserialize_dbref_data(buf, key, parent) - ns = deserialize_cstr(buf) + def deserialize_dbref_data(buf) + ns = deserialize_string_data(buf) oid = deserialize_oid_data(buf) - XGen::Mongo::Driver::DBRef.new(parent, key, @db, ns, oid) + DBRef.new(ns, oid) end def deserialize_binary_data(buf) len = buf.get_int - bytes = buf.get(len) - str = '' - bytes.each { |c| str << c.chr } - str.to_mongo_binary + type = buf.get + len = buf.get_int if type == Binary::SUBTYPE_BYTES + Binary.new(buf.get(len), type) end def serialize_eoo_element(buf) buf.put(EOO) end @@ -273,33 +359,33 @@ buf.put(NULL) self.class.serialize_cstr(buf, key) end def serialize_dbref_element(buf, key, val) - buf.put(REF) - self.class.serialize_cstr(buf, key) - self.class.serialize_cstr(buf, val.namespace) - buf.put_array(val.object_id.to_a) + oh = OrderedHash.new + oh['$ref'] = val.namespace + oh['$id'] = val.object_id + serialize_object_element(buf, key, oh, false) end def serialize_binary_element(buf, key, val) buf.put(BINARY) self.class.serialize_cstr(buf, key) - buf.put_int(val.length) - bytes = if RUBY_VERSION >= '1.9' - val.bytes.to_a - else - a = [] - val.each_byte { |byte| a << byte } - a - end - buf.put_array(bytes) - end - def serialize_undefined_element(buf, key) - buf.put(UNDEFINED) - self.class.serialize_cstr(buf, key) + bytes = val.to_a + num_bytes = bytes.length + subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES + if subtype == Binary::SUBTYPE_BYTES + buf.put_int(num_bytes + 4) + buf.put(subtype) + buf.put_int(num_bytes) + buf.put_array(bytes) + else + buf.put_int(num_bytes) + buf.put(subtype) + buf.put_array(bytes) + end end def serialize_boolean_element(buf, key, val) buf.put(BOOLEAN) self.class.serialize_cstr(buf, key) @@ -312,31 +398,42 @@ millisecs = (val.to_f * 1000).to_i buf.put_long(millisecs) end def serialize_number_element(buf, key, val, type) - buf.put(type) - self.class.serialize_cstr(buf, key) if type == NUMBER + buf.put(type) + self.class.serialize_cstr(buf, key) buf.put_double(val) else - buf.put_int(val) + if val > 2**64 / 2 - 1 or val < -2**64 / 2 + raise RangeError.new("MongoDB can only handle 8-byte ints") + end + if val > 2**32 / 2 - 1 or val < -2**32 / 2 + buf.put(NUMBER_LONG) + self.class.serialize_cstr(buf, key) + buf.put_long(val) + else + buf.put(type) + self.class.serialize_cstr(buf, key) + buf.put_int(val) + end end end - def serialize_object_element(buf, key, val, opcode=OBJECT) + def serialize_object_element(buf, key, val, check_keys, opcode=OBJECT) buf.put(opcode) self.class.serialize_cstr(buf, key) - buf.put_array(BSON.new.serialize(val).to_a) + buf.put_array(BSON.new.serialize(val, check_keys).to_a) end - def serialize_array_element(buf, key, val) + def serialize_array_element(buf, key, val, check_keys) # Turn array into hash with integer indices as keys h = OrderedHash.new i = 0 val.each { |v| h[i] = v; i += 1 } - serialize_object_element(buf, key, h, ARRAY) + serialize_object_element(buf, key, h, check_keys, ARRAY) end def serialize_regex_element(buf, key, val) buf.put(REGEX) self.class.serialize_cstr(buf, key) @@ -379,53 +476,69 @@ # Go back to where we were buf.position = end_pos end + def serialize_code_w_scope(buf, key, val) + buf.put(CODE_W_SCOPE) + self.class.serialize_cstr(buf, key) + + # Make a hole for the length + len_pos = buf.position + buf.put_int(0) + + buf.put_int(val.length + 1) + self.class.serialize_cstr(buf, val) + buf.put_array(BSON.new.serialize(val.scope).to_a) + + end_pos = buf.position + buf.put_int(end_pos - len_pos, len_pos) + buf.position = end_pos + end + def deserialize_cstr(buf) chars = "" - while 1 + while true b = buf.get break if b == 0 chars << b.chr end if RUBY_VERSION >= '1.9' chars.force_encoding("utf-8") # Mongo stores UTF-8 end chars end - def bson_type(o, key) + def bson_type(o) case o when nil NULL when Integer NUMBER_INT when Numeric NUMBER - when XGen::Mongo::Driver::Binary # must be before String + when ByteBuffer BINARY + when Code + CODE_W_SCOPE when String - # magic awful stuff - the DB requires that a where clause is sent as CODE - key == "$where" ? CODE : STRING + STRING when Array ARRAY when Regexp REGEX - when XGen::Mongo::Driver::ObjectID + when ObjectID OID - when XGen::Mongo::Driver::DBRef + when DBRef REF when true, false BOOLEAN when Time DATE when Hash OBJECT when Symbol SYMBOL - when XGen::Mongo::Driver::Undefined - UNDEFINED else raise "Unknown type of object: #{o.class.name}" end end