lib/mongo/util/bson.rb in mongo-0.1.0 vs lib/mongo/util/bson.rb in mongo-0.15
- old
+ new
@@ -19,15 +19,16 @@
require 'mongo/util/ordered_hash'
require 'mongo/types/binary'
require 'mongo/types/dbref'
require 'mongo/types/objectid'
require 'mongo/types/regexp_of_holding'
-require 'mongo/types/undefined'
# A BSON seralizer/deserializer.
class BSON
+ include Mongo
+
MINKEY = -1
EOO = 0
NUMBER = 1
STRING = 2
OBJECT = 3
@@ -42,145 +43,194 @@
REF = 12
CODE = 13
SYMBOL = 14
CODE_W_SCOPE = 15
NUMBER_INT = 16
+ TIMESTAMP = 17
+ NUMBER_LONG = 18
MAXKEY = 127
if RUBY_VERSION >= '1.9'
def self.to_utf8(str)
str.encode("utf-8")
end
else
def self.to_utf8(str)
- str # TODO punt for now
+ str # TODO Ruby 1.8 punt for now
end
end
def self.serialize_cstr(buf, val)
buf.put_array(to_utf8(val.to_s).unpack("C*") + [0])
end
- def initialize(db=nil)
- # db is only needed during deserialization when the data contains a DBRef
- @db = db
+ def initialize()
@buf = ByteBuffer.new
end
def to_a
@buf.to_a
end
- def serialize(obj)
- raise "Document is null" unless obj
+ begin
+ require 'mongo_ext/cbson'
+ def serialize(obj, check_keys=false)
+ @buf = ByteBuffer.new(CBson.serialize(obj, check_keys))
+ end
+ rescue LoadError
+ def serialize(obj, check_keys=false)
+ raise "Document is null" unless obj
- @buf.rewind
- # put in a placeholder for the total size
- @buf.put_int(0)
+ @buf.rewind
+ # put in a placeholder for the total size
+ @buf.put_int(0)
- obj.each {|k, v|
- type = bson_type(v, k)
- case type
- when STRING, CODE, SYMBOL
- serialize_string_element(@buf, k, v, type)
- when NUMBER, NUMBER_INT
- serialize_number_element(@buf, k, v, type)
- when OBJECT
- serialize_object_element(@buf, k, v)
- when OID
- serialize_oid_element(@buf, k, v)
- when ARRAY
- serialize_array_element(@buf, k, v)
- when REGEX
- serialize_regex_element(@buf, k, v)
- when BOOLEAN
- serialize_boolean_element(@buf, k, v)
- when DATE
- serialize_date_element(@buf, k, v)
- when NULL
- serialize_null_element(@buf, k)
- when REF
- serialize_dbref_element(@buf, k, v)
- when BINARY
- serialize_binary_element(@buf, k, v)
- when UNDEFINED
- serialize_undefined_element(@buf, k)
- when CODE_W_SCOPE
- # TODO
- raise "unimplemented type #{type}"
- else
- raise "unhandled type #{type}"
+ # Write key/value pairs. Always write _id first if it exists.
+ if obj.has_key? '_id'
+ serialize_key_value('_id', obj['_id'], check_keys)
+ elsif obj.has_key? :_id
+ serialize_key_value('_id', obj[:_id], check_keys)
end
- }
- serialize_eoo_element(@buf)
- @buf.put_int(@buf.size, 0)
- self
+
+ obj.each {|k, v| serialize_key_value(k, v, check_keys) unless k == '_id' || k == :_id }
+
+ serialize_eoo_element(@buf)
+ @buf.put_int(@buf.size, 0)
+ self
+ end
end
- def deserialize(buf=nil, parent=nil)
- # If buf is nil, use @buf, assumed to contain already-serialized BSON.
- # This is only true during testing.
- @buf = ByteBuffer.new(buf.to_a) if buf
- @buf.rewind
- @buf.get_int # eat message size
- doc = OrderedHash.new
- while @buf.more?
- type = @buf.get
- case type
- when STRING, CODE
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_string_data(@buf)
- when SYMBOL
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_string_data(@buf).intern
- when NUMBER
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_number_data(@buf)
- when NUMBER_INT
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_number_int_data(@buf)
- when OID
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_oid_data(@buf)
- when ARRAY
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_array_data(@buf, doc)
- when REGEX
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_regex_data(@buf)
- when OBJECT
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_object_data(@buf, doc)
- when BOOLEAN
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_boolean_data(@buf)
- when DATE
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_date_data(@buf)
- when NULL
- key = deserialize_cstr(@buf)
- doc[key] = nil
- when UNDEFINED
- key = deserialize_cstr(@buf)
- doc[key] = XGen::Mongo::Driver::Undefined.new
- when REF
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_dbref_data(@buf, key, parent)
- when BINARY
- key = deserialize_cstr(@buf)
- doc[key] = deserialize_binary_data(@buf)
- when CODE_W_SCOPE
- # TODO
- raise "unimplemented type #{type}"
- when EOO
- break
+ def serialize_key_value(k, v, check_keys)
+ k = k.to_s
+ if check_keys
+ if k[0] == ?$
+ raise InvalidName.new("key #{k} must not start with '$'")
+ end
+ if k.include? ?.
+ raise InvalidName.new("key #{k} must not contain '.'")
+ end
+ end
+ type = bson_type(v)
+ case type
+ when STRING, SYMBOL
+ serialize_string_element(@buf, k, v, type)
+ when NUMBER, NUMBER_INT
+ serialize_number_element(@buf, k, v, type)
+ when OBJECT
+ serialize_object_element(@buf, k, v, check_keys)
+ when OID
+ serialize_oid_element(@buf, k, v)
+ when ARRAY
+ serialize_array_element(@buf, k, v, check_keys)
+ when REGEX
+ serialize_regex_element(@buf, k, v)
+ when BOOLEAN
+ serialize_boolean_element(@buf, k, v)
+ when DATE
+ serialize_date_element(@buf, k, v)
+ when NULL
+ serialize_null_element(@buf, k)
+ when REF
+ serialize_dbref_element(@buf, k, v)
+ when BINARY
+ serialize_binary_element(@buf, k, v)
+ when UNDEFINED
+ serialize_null_element(@buf, k)
+ when CODE_W_SCOPE
+ serialize_code_w_scope(@buf, k, v)
+ else
+ raise "unhandled type #{type}"
+ end
+ end
+
+ begin
+ require 'mongo_ext/cbson'
+ def deserialize(buf=nil)
+ if buf.is_a? String
+ @buf = ByteBuffer.new(buf) if buf
else
- raise "Unknown type #{type}, key = #{key}"
+ @buf = ByteBuffer.new(buf.to_a) if buf
end
+ @buf.rewind
+ CBson.deserialize(@buf.to_s)
end
- @buf.rewind
- doc
+ rescue LoadError
+ def deserialize(buf=nil)
+ # If buf is nil, use @buf, assumed to contain already-serialized BSON.
+ # This is only true during testing.
+ if buf.is_a? String
+ @buf = ByteBuffer.new(buf) if buf
+ else
+ @buf = ByteBuffer.new(buf.to_a) if buf
+ end
+ @buf.rewind
+ @buf.get_int # eat message size
+ doc = OrderedHash.new
+ while @buf.more?
+ type = @buf.get
+ case type
+ when STRING, CODE
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_string_data(@buf)
+ when SYMBOL
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_string_data(@buf).intern
+ when NUMBER
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_number_data(@buf)
+ when NUMBER_INT
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_number_int_data(@buf)
+ when NUMBER_LONG
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_number_long_data(@buf)
+ when OID
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_oid_data(@buf)
+ when ARRAY
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_array_data(@buf)
+ when REGEX
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_regex_data(@buf)
+ when OBJECT
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_object_data(@buf)
+ when BOOLEAN
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_boolean_data(@buf)
+ when DATE
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_date_data(@buf)
+ when NULL
+ key = deserialize_cstr(@buf)
+ doc[key] = nil
+ when UNDEFINED
+ key = deserialize_cstr(@buf)
+ doc[key] = nil
+ when REF
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_dbref_data(@buf)
+ when BINARY
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_binary_data(@buf)
+ when CODE_W_SCOPE
+ key = deserialize_cstr(@buf)
+ doc[key] = deserialize_code_w_scope_data(@buf)
+ when TIMESTAMP
+ key = deserialize_cstr(@buf)
+ doc[key] = [deserialize_number_int_data(@buf),
+ deserialize_number_int_data(@buf)]
+ when EOO
+ break
+ else
+ raise "Unknown type #{type}, key = #{key}"
+ end
+ end
+ @buf.rewind
+ doc
+ end
end
# For debugging.
def hex_dump
str = ''
@@ -195,12 +245,14 @@
}
str
end
def deserialize_date_data(buf)
- millisecs = buf.get_long()
- Time.at(millisecs.to_f / 1000.0) # at() takes fractional seconds
+ unsigned = buf.get_long()
+ # see note for deserialize_number_long_data below
+ milliseconds = unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
+ Time.at(milliseconds.to_f / 1000.0).utc # at() takes fractional seconds
end
def deserialize_boolean_data(buf)
buf.get == 1
end
@@ -208,21 +260,35 @@
def deserialize_number_data(buf)
buf.get_double
end
def deserialize_number_int_data(buf)
- buf.get_int
+ # sometimes ruby makes me angry... why would the same code pack as signed
+ # but unpack as unsigned
+ unsigned = buf.get_int
+ unsigned >= 2**32 / 2 ? unsigned - 2**32 : unsigned
end
- def deserialize_object_data(buf, parent)
+ def deserialize_number_long_data(buf)
+ # same note as above applies here...
+ unsigned = buf.get_long
+ unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
+ end
+
+ def deserialize_object_data(buf)
size = buf.get_int
buf.position -= 4
- BSON.new(@db).deserialize(buf.get(size), parent)
+ object = BSON.new().deserialize(buf.get(size))
+ if object.has_key? "$ref"
+ DBRef.new(object["$ref"], object["$id"])
+ else
+ object
+ end
end
- def deserialize_array_data(buf, parent)
- h = deserialize_object_data(buf, parent)
+ def deserialize_array_data(buf)
+ h = deserialize_object_data(buf)
a = []
h.each { |k, v| a[k.to_i] = v }
a
end
@@ -232,39 +298,59 @@
options = 0
options |= Regexp::IGNORECASE if options_str.include?('i')
options |= Regexp::MULTILINE if options_str.include?('m')
options |= Regexp::EXTENDED if options_str.include?('x')
options_str.gsub!(/[imx]/, '') # Now remove the three we understand
- XGen::Mongo::Driver::RegexpOfHolding.new(str, options, options_str)
+ RegexpOfHolding.new(str, options, options_str)
end
def deserialize_string_data(buf)
len = buf.get_int
bytes = buf.get(len)
- str = bytes[0..-2].pack("C*")
+ str = bytes[0..-2]
+ if str.respond_to? "pack"
+ str = str.pack("C*")
+ end
if RUBY_VERSION >= '1.9'
str.force_encoding("utf-8")
end
str
end
+ def deserialize_code_w_scope_data(buf)
+ buf.get_int
+ len = buf.get_int
+ code = buf.get(len)[0..-2]
+ if code.respond_to? "pack"
+ code = code.pack("C*")
+ end
+ if RUBY_VERSION >= '1.9'
+ code.force_encoding("utf-8")
+ end
+
+ scope_size = buf.get_int
+ buf.position -= 4
+ scope = BSON.new().deserialize(buf.get(scope_size))
+
+ Code.new(code, scope)
+ end
+
def deserialize_oid_data(buf)
- XGen::Mongo::Driver::ObjectID.new(buf.get(12))
+ ObjectID.new(buf.get(12))
end
- def deserialize_dbref_data(buf, key, parent)
- ns = deserialize_cstr(buf)
+ def deserialize_dbref_data(buf)
+ ns = deserialize_string_data(buf)
oid = deserialize_oid_data(buf)
- XGen::Mongo::Driver::DBRef.new(parent, key, @db, ns, oid)
+ DBRef.new(ns, oid)
end
def deserialize_binary_data(buf)
len = buf.get_int
- bytes = buf.get(len)
- str = ''
- bytes.each { |c| str << c.chr }
- str.to_mongo_binary
+ type = buf.get
+ len = buf.get_int if type == Binary::SUBTYPE_BYTES
+ Binary.new(buf.get(len), type)
end
def serialize_eoo_element(buf)
buf.put(EOO)
end
@@ -273,33 +359,33 @@
buf.put(NULL)
self.class.serialize_cstr(buf, key)
end
def serialize_dbref_element(buf, key, val)
- buf.put(REF)
- self.class.serialize_cstr(buf, key)
- self.class.serialize_cstr(buf, val.namespace)
- buf.put_array(val.object_id.to_a)
+ oh = OrderedHash.new
+ oh['$ref'] = val.namespace
+ oh['$id'] = val.object_id
+ serialize_object_element(buf, key, oh, false)
end
def serialize_binary_element(buf, key, val)
buf.put(BINARY)
self.class.serialize_cstr(buf, key)
- buf.put_int(val.length)
- bytes = if RUBY_VERSION >= '1.9'
- val.bytes.to_a
- else
- a = []
- val.each_byte { |byte| a << byte }
- a
- end
- buf.put_array(bytes)
- end
- def serialize_undefined_element(buf, key)
- buf.put(UNDEFINED)
- self.class.serialize_cstr(buf, key)
+ bytes = val.to_a
+ num_bytes = bytes.length
+ subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES
+ if subtype == Binary::SUBTYPE_BYTES
+ buf.put_int(num_bytes + 4)
+ buf.put(subtype)
+ buf.put_int(num_bytes)
+ buf.put_array(bytes)
+ else
+ buf.put_int(num_bytes)
+ buf.put(subtype)
+ buf.put_array(bytes)
+ end
end
def serialize_boolean_element(buf, key, val)
buf.put(BOOLEAN)
self.class.serialize_cstr(buf, key)
@@ -312,31 +398,42 @@
millisecs = (val.to_f * 1000).to_i
buf.put_long(millisecs)
end
def serialize_number_element(buf, key, val, type)
- buf.put(type)
- self.class.serialize_cstr(buf, key)
if type == NUMBER
+ buf.put(type)
+ self.class.serialize_cstr(buf, key)
buf.put_double(val)
else
- buf.put_int(val)
+ if val > 2**64 / 2 - 1 or val < -2**64 / 2
+ raise RangeError.new("MongoDB can only handle 8-byte ints")
+ end
+ if val > 2**32 / 2 - 1 or val < -2**32 / 2
+ buf.put(NUMBER_LONG)
+ self.class.serialize_cstr(buf, key)
+ buf.put_long(val)
+ else
+ buf.put(type)
+ self.class.serialize_cstr(buf, key)
+ buf.put_int(val)
+ end
end
end
- def serialize_object_element(buf, key, val, opcode=OBJECT)
+ def serialize_object_element(buf, key, val, check_keys, opcode=OBJECT)
buf.put(opcode)
self.class.serialize_cstr(buf, key)
- buf.put_array(BSON.new.serialize(val).to_a)
+ buf.put_array(BSON.new.serialize(val, check_keys).to_a)
end
- def serialize_array_element(buf, key, val)
+ def serialize_array_element(buf, key, val, check_keys)
# Turn array into hash with integer indices as keys
h = OrderedHash.new
i = 0
val.each { |v| h[i] = v; i += 1 }
- serialize_object_element(buf, key, h, ARRAY)
+ serialize_object_element(buf, key, h, check_keys, ARRAY)
end
def serialize_regex_element(buf, key, val)
buf.put(REGEX)
self.class.serialize_cstr(buf, key)
@@ -379,53 +476,69 @@
# Go back to where we were
buf.position = end_pos
end
+ def serialize_code_w_scope(buf, key, val)
+ buf.put(CODE_W_SCOPE)
+ self.class.serialize_cstr(buf, key)
+
+ # Make a hole for the length
+ len_pos = buf.position
+ buf.put_int(0)
+
+ buf.put_int(val.length + 1)
+ self.class.serialize_cstr(buf, val)
+ buf.put_array(BSON.new.serialize(val.scope).to_a)
+
+ end_pos = buf.position
+ buf.put_int(end_pos - len_pos, len_pos)
+ buf.position = end_pos
+ end
+
def deserialize_cstr(buf)
chars = ""
- while 1
+ while true
b = buf.get
break if b == 0
chars << b.chr
end
if RUBY_VERSION >= '1.9'
chars.force_encoding("utf-8") # Mongo stores UTF-8
end
chars
end
- def bson_type(o, key)
+ def bson_type(o)
case o
when nil
NULL
when Integer
NUMBER_INT
when Numeric
NUMBER
- when XGen::Mongo::Driver::Binary # must be before String
+ when ByteBuffer
BINARY
+ when Code
+ CODE_W_SCOPE
when String
- # magic awful stuff - the DB requires that a where clause is sent as CODE
- key == "$where" ? CODE : STRING
+ STRING
when Array
ARRAY
when Regexp
REGEX
- when XGen::Mongo::Driver::ObjectID
+ when ObjectID
OID
- when XGen::Mongo::Driver::DBRef
+ when DBRef
REF
when true, false
BOOLEAN
when Time
DATE
when Hash
OBJECT
when Symbol
SYMBOL
- when XGen::Mongo::Driver::Undefined
- UNDEFINED
else
raise "Unknown type of object: #{o.class.name}"
end
end