lib/avro/schema.rb in avro-1.7.4 vs lib/avro/schema.rb in avro-1.7.5
- old
+ new
@@ -14,134 +14,156 @@
# See the License for the specific language governing permissions and
# limitations under the License.
module Avro
class Schema
- # FIXME turn these into symbols to prevent some gc pressure
+ # Sets of strings, for backwards compatibility. See below for sets of symbols,
+ # for better performance.
PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double])
NAMED_TYPES = Set.new(%w[fixed enum record error])
VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request])
+ PRIMITIVE_TYPES_SYM = Set.new(PRIMITIVE_TYPES.map(&:to_sym))
+ NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym))
+ VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym))
+
INT_MIN_VALUE = -(1 << 31)
INT_MAX_VALUE = (1 << 31) - 1
LONG_MIN_VALUE = -(1 << 63)
LONG_MAX_VALUE = (1 << 63) - 1
def self.parse(json_string)
real_parse(Yajl.load(json_string), {})
end
# Build Avro Schema from data parsed out of JSON string.
- def self.real_parse(json_obj, names=nil)
+ def self.real_parse(json_obj, names=nil, default_namespace=nil)
if json_obj.is_a? Hash
type = json_obj['type']
- if PRIMITIVE_TYPES.include?(type)
- return PrimitiveSchema.new(type)
- elsif NAMED_TYPES.include? type
+ raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
+
+ # Check that the type is valid before calling #to_sym, since symbols are never garbage
+ # collected (important to avoid DoS if we're accepting schemas from untrusted clients)
+ unless VALID_TYPES.include?(type)
+ raise SchemaParseError, "Unknown type: #{type}"
+ end
+
+ type_sym = type.to_sym
+ if PRIMITIVE_TYPES_SYM.include?(type_sym)
+ return PrimitiveSchema.new(type_sym)
+
+ elsif NAMED_TYPES_SYM.include? type_sym
name = json_obj['name']
- namespace = json_obj['namespace']
- case type
- when 'fixed'
+ namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace
+ case type_sym
+ when :fixed
size = json_obj['size']
return FixedSchema.new(name, namespace, size, names)
- when 'enum'
+ when :enum
symbols = json_obj['symbols']
return EnumSchema.new(name, namespace, symbols, names)
- when 'record', 'error'
+ when :record, :error
fields = json_obj['fields']
- return RecordSchema.new(name, namespace, fields, names, type)
+ return RecordSchema.new(name, namespace, fields, names, type_sym)
else
raise SchemaParseError.new("Unknown named type: #{type}")
end
- elsif VALID_TYPES.include?(type)
- case type
- when 'array'
- return ArraySchema.new(json_obj['items'], names)
- when 'map'
- return MapSchema.new(json_obj['values'], names)
+
+ else
+ case type_sym
+ when :array
+ return ArraySchema.new(json_obj['items'], names, default_namespace)
+ when :map
+ return MapSchema.new(json_obj['values'], names, default_namespace)
else
raise SchemaParseError.new("Unknown Valid Type: #{type}")
end
- elsif type.nil?
- raise SchemaParseError.new("No \"type\" property: #{json_obj}")
- else
- raise SchemaParseError.new("Undefined type: #{type}")
end
+
elsif json_obj.is_a? Array
# JSON array (union)
- return UnionSchema.new(json_obj, names)
+ return UnionSchema.new(json_obj, names, default_namespace)
elsif PRIMITIVE_TYPES.include? json_obj
return PrimitiveSchema.new(json_obj)
else
msg = "#{json_obj.inspect} is not a schema we know about."
raise SchemaParseError.new(msg)
end
end
# Determine if a ruby datum is an instance of a schema
def self.validate(expected_schema, datum)
- case expected_schema.type
- when 'null'
+ case expected_schema.type_sym
+ when :null
datum.nil?
- when 'boolean'
+ when :boolean
datum == true || datum == false
- when 'string', 'bytes'
+ when :string, :bytes
datum.is_a? String
- when 'int'
+ when :int
(datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
(INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
- when 'long'
+ when :long
(datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
(LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
- when 'float', 'double'
+ when :float, :double
datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
- when 'fixed'
+ when :fixed
datum.is_a?(String) && datum.size == expected_schema.size
- when 'enum'
+ when :enum
expected_schema.symbols.include? datum
- when 'array'
+ when :array
datum.is_a?(Array) &&
datum.all?{|d| validate(expected_schema.items, d) }
- when 'map'
+ when :map
datum.keys.all?{|k| k.is_a? String } &&
datum.values.all?{|v| validate(expected_schema.values, v) }
- when 'union'
+ when :union
expected_schema.schemas.any?{|s| validate(s, datum) }
- when 'record', 'error', 'request'
+ when :record, :error, :request
datum.is_a?(Hash) &&
expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
else
raise "you suck #{expected_schema.inspect} is not allowed."
end
end
def initialize(type)
- @type = type
+ @type_sym = type.is_a?(Symbol) ? type : type.to_sym
end
- def type; @type; end
+ attr_reader :type_sym
+ # Returns the type as a string (rather than a symbol), for backwards compatibility.
+ # Deprecated in favor of {#type_sym}.
+ def type; @type_sym.to_s; end
+
def ==(other, seen=nil)
- other.is_a?(Schema) && @type == other.type
+ other.is_a?(Schema) && type_sym == other.type_sym
end
def hash(seen=nil)
- @type.hash
+ type_sym.hash
end
- def subparse(json_obj, names=nil)
+ def subparse(json_obj, names=nil, namespace=nil)
+ if json_obj.is_a?(String) && names
+ fullname = Name.make_fullname(json_obj, namespace)
+ return names[fullname] if names.include?(fullname)
+ end
+
begin
- Schema.real_parse(json_obj, names)
+ Schema.real_parse(json_obj, names, namespace)
rescue => e
raise e if e.is_a? SchemaParseError
raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}"
end
end
- def to_avro
- {'type' => @type}
+ def to_avro(names=nil)
+ {'type' => type}
end
def to_s
Yajl.dump to_avro
end
@@ -152,33 +174,37 @@
super(type)
@name, @namespace = Name.extract_namespace(name, namespace)
names = Name.add_name(names, self)
end
- def to_avro
+ def to_avro(names=Set.new)
+ if @name
+ return fullname if names.include?(fullname)
+ names << fullname
+ end
props = {'name' => @name}
props.merge!('namespace' => @namespace) if @namespace
super.merge props
end
def fullname
- Name.make_fullname(@name, @namespace)
+ @fullname ||= Name.make_fullname(@name, @namespace)
end
end
class RecordSchema < NamedSchema
attr_reader :fields
- def self.make_field_objects(field_data, names)
+ def self.make_field_objects(field_data, names, namespace=nil)
field_objects, field_names = [], Set.new
field_data.each_with_index do |field, i|
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
type = field['type']
name = field['name']
default = field['default']
order = field['order']
- new_field = Field.new(type, name, default, order, names)
+ new_field = Field.new(type, name, default, order, names, namespace)
# make sure field name has not been used yet
if field_names.include?(new_field.name)
raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
end
field_names << new_field.name
@@ -188,157 +214,121 @@
field_objects << new_field
end
field_objects
end
- def initialize(name, namespace, fields, names=nil, schema_type='record')
- if schema_type == 'request'
- @type = schema_type
+ def initialize(name, namespace, fields, names=nil, schema_type=:record)
+ if schema_type == :request || schema_type == 'request'
+ @type_sym = schema_type.to_sym
+ @namespace = namespace
else
super(schema_type, name, namespace, names)
end
- @fields = RecordSchema.make_field_objects(fields, names)
+ @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
end
def fields_hash
- fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
+ @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh }
end
- def to_avro
- hsh = super.merge('fields' => @fields.map {|f| f.to_avro } )
- if type == 'request'
+ def to_avro(names=Set.new)
+ hsh = super
+ return hsh unless hsh.is_a?(Hash)
+ hsh['fields'] = @fields.map {|f| f.to_avro(names) }
+ if type_sym == :request
hsh['fields']
else
hsh
end
end
end
class ArraySchema < Schema
- attr_reader :items, :items_schema_from_names
- def initialize(items, names=nil)
- @items_schema_from_names = false
+ attr_reader :items
- super('array')
-
- if items.is_a?(String) && names.has_key?(items)
- @items = names[items]
- @items_schema_from_names = true
- else
- @items = subparse(items, names)
- end
+ def initialize(items, names=nil, default_namespace=nil)
+ super(:array)
+ @items = subparse(items, names, default_namespace)
end
- def to_avro
- name_or_json = if items_schema_from_names
- items.fullname
- else
- items.to_avro
- end
- super.merge('items' => name_or_json)
+ def to_avro(names=Set.new)
+ super.merge('items' => items.to_avro(names))
end
end
class MapSchema < Schema
- attr_reader :values, :values_schema_from_names
+ attr_reader :values
- def initialize(values, names=nil)
- @values_schema_from_names = false
- super('map')
- if values.is_a?(String) && names.has_key?(values)
- values_schema = names[values]
- @values_schema_from_names = true
- else
- values_schema = subparse(values, names)
- end
- @values = values_schema
+ def initialize(values, names=nil, default_namespace=nil)
+ super(:map)
+ @values = subparse(values, names, default_namespace)
end
- def to_avro
- to_dump = super
- if values_schema_from_names
- to_dump['values'] = values
- else
- to_dump['values'] = values.to_avro
- end
- to_dump
+ def to_avro(names=Set.new)
+ super.merge('values' => values.to_avro(names))
end
end
class UnionSchema < Schema
- attr_reader :schemas, :schema_from_names_indices
- def initialize(schemas, names=nil)
- super('union')
+ attr_reader :schemas
+ def initialize(schemas, names=nil, default_namespace=nil)
+ super(:union)
+
schema_objects = []
- @schema_from_names_indices = []
schemas.each_with_index do |schema, i|
- from_names = false
- if schema.is_a?(String) && names.has_key?(schema)
- new_schema = names[schema]
- from_names = true
- else
- new_schema = subparse(schema, names)
- end
+ new_schema = subparse(schema, names, default_namespace)
+ ns_type = new_schema.type_sym
- ns_type = new_schema.type
- if VALID_TYPES.include?(ns_type) &&
- !NAMED_TYPES.include?(ns_type) &&
- schema_objects.map{|o| o.type }.include?(ns_type)
+ if VALID_TYPES_SYM.include?(ns_type) &&
+ !NAMED_TYPES_SYM.include?(ns_type) &&
+ schema_objects.any?{|o| o.type_sym == ns_type }
raise SchemaParseError, "#{ns_type} is already in Union"
- elsif ns_type == 'union'
+ elsif ns_type == :union
raise SchemaParseError, "Unions cannot contain other unions"
else
schema_objects << new_schema
- @schema_from_names_indices << i if from_names
end
@schemas = schema_objects
end
end
- def to_avro
- # FIXME(jmhodges) this from_name pattern is really weird and
- # seems code-smelly.
- to_dump = []
- schemas.each_with_index do |schema, i|
- if schema_from_names_indices.include?(i)
- to_dump << schema.fullname
- else
- to_dump << schema.to_avro
- end
- end
- to_dump
+ def to_avro(names=Set.new)
+ schemas.map {|schema| schema.to_avro(names) }
end
end
class EnumSchema < NamedSchema
attr_reader :symbols
def initialize(name, space, symbols, names=nil)
if symbols.uniq.length < symbols.length
fail_msg = 'Duplicate symbol: %s' % symbols
raise Avro::SchemaParseError, fail_msg
end
- super('enum', name, space, names)
+ super(:enum, name, space, names)
@symbols = symbols
end
- def to_avro
- super.merge('symbols' => symbols)
+ def to_avro(names=Set.new)
+ avro = super
+ avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro
end
end
# Valid primitive types are in PRIMITIVE_TYPES.
class PrimitiveSchema < Schema
def initialize(type)
- unless PRIMITIVE_TYPES.include? type
+ if PRIMITIVE_TYPES_SYM.include?(type)
+ super(type)
+ elsif PRIMITIVE_TYPES.include?(type)
+ super(type.to_sym)
+ else
raise AvroError.new("#{type} is not a valid primitive type.")
end
-
- super(type)
end
- def to_avro
+ def to_avro(names=nil)
hsh = super
hsh.size == 1 ? type : hsh
end
end
@@ -347,43 +337,34 @@
def initialize(name, space, size, names=nil)
# Ensure valid cto args
unless size.is_a?(Fixnum) || size.is_a?(Bignum)
raise AvroError, 'Fixed Schema requires a valid integer for size property.'
end
- super('fixed', name, space, names)
+ super(:fixed, name, space, names)
@size = size
end
- def to_avro
- super.merge('size' => @size)
+ def to_avro(names=Set.new)
+ avro = super
+ avro.is_a?(Hash) ? avro.merge('size' => size) : avro
end
end
class Field < Schema
- attr_reader :type, :name, :default, :order, :type_from_names
- def initialize(type, name, default=nil, order=nil, names=nil)
- @type_from_names = false
- if type.is_a?(String) && names && names.has_key?(type)
- type_schema = names[type]
- @type_from_names = true
- else
- type_schema = subparse(type, names)
- end
- @type = type_schema
+ attr_reader :type, :name, :default, :order
+
+ def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil)
+ @type = subparse(type, names, namespace)
@name = name
@default = default
@order = order
end
- def to_avro
- sigh_type = type_from_names ? type.fullname : type.to_avro
- hsh = {
- 'name' => name,
- 'type' => sigh_type
- }
- hsh['default'] = default if default
- hsh['order'] = order if order
- hsh
+ def to_avro(names=Set.new)
+ {'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
+ avro['default'] = default if default
+ avro['order'] = order if order
+ end
end
end
end
class SchemaParseError < AvroError; end