lib/avro/schema.rb in avro-1.7.4 vs lib/avro/schema.rb in avro-1.7.5

- old
+ new

@@ -14,134 +14,156 @@ # See the License for the specific language governing permissions and # limitations under the License. module Avro class Schema - # FIXME turn these into symbols to prevent some gc pressure + # Sets of strings, for backwards compatibility. See below for sets of symbols, + # for better performance. PRIMITIVE_TYPES = Set.new(%w[null boolean string bytes int long float double]) NAMED_TYPES = Set.new(%w[fixed enum record error]) VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + Set.new(%w[array map union request]) + PRIMITIVE_TYPES_SYM = Set.new(PRIMITIVE_TYPES.map(&:to_sym)) + NAMED_TYPES_SYM = Set.new(NAMED_TYPES.map(&:to_sym)) + VALID_TYPES_SYM = Set.new(VALID_TYPES.map(&:to_sym)) + INT_MIN_VALUE = -(1 << 31) INT_MAX_VALUE = (1 << 31) - 1 LONG_MIN_VALUE = -(1 << 63) LONG_MAX_VALUE = (1 << 63) - 1 def self.parse(json_string) real_parse(Yajl.load(json_string), {}) end # Build Avro Schema from data parsed out of JSON string. - def self.real_parse(json_obj, names=nil) + def self.real_parse(json_obj, names=nil, default_namespace=nil) if json_obj.is_a? Hash type = json_obj['type'] - if PRIMITIVE_TYPES.include?(type) - return PrimitiveSchema.new(type) - elsif NAMED_TYPES.include? type + raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil? + + # Check that the type is valid before calling #to_sym, since symbols are never garbage + # collected (important to avoid DoS if we're accepting schemas from untrusted clients) + unless VALID_TYPES.include?(type) + raise SchemaParseError, "Unknown type: #{type}" + end + + type_sym = type.to_sym + if PRIMITIVE_TYPES_SYM.include?(type_sym) + return PrimitiveSchema.new(type_sym) + + elsif NAMED_TYPES_SYM.include? type_sym name = json_obj['name'] - namespace = json_obj['namespace'] - case type - when 'fixed' + namespace = json_obj.include?('namespace') ? json_obj['namespace'] : default_namespace + case type_sym + when :fixed size = json_obj['size'] return FixedSchema.new(name, namespace, size, names) - when 'enum' + when :enum symbols = json_obj['symbols'] return EnumSchema.new(name, namespace, symbols, names) - when 'record', 'error' + when :record, :error fields = json_obj['fields'] - return RecordSchema.new(name, namespace, fields, names, type) + return RecordSchema.new(name, namespace, fields, names, type_sym) else raise SchemaParseError.new("Unknown named type: #{type}") end - elsif VALID_TYPES.include?(type) - case type - when 'array' - return ArraySchema.new(json_obj['items'], names) - when 'map' - return MapSchema.new(json_obj['values'], names) + + else + case type_sym + when :array + return ArraySchema.new(json_obj['items'], names, default_namespace) + when :map + return MapSchema.new(json_obj['values'], names, default_namespace) else raise SchemaParseError.new("Unknown Valid Type: #{type}") end - elsif type.nil? - raise SchemaParseError.new("No \"type\" property: #{json_obj}") - else - raise SchemaParseError.new("Undefined type: #{type}") end + elsif json_obj.is_a? Array # JSON array (union) - return UnionSchema.new(json_obj, names) + return UnionSchema.new(json_obj, names, default_namespace) elsif PRIMITIVE_TYPES.include? json_obj return PrimitiveSchema.new(json_obj) else msg = "#{json_obj.inspect} is not a schema we know about." raise SchemaParseError.new(msg) end end # Determine if a ruby datum is an instance of a schema def self.validate(expected_schema, datum) - case expected_schema.type - when 'null' + case expected_schema.type_sym + when :null datum.nil? - when 'boolean' + when :boolean datum == true || datum == false - when 'string', 'bytes' + when :string, :bytes datum.is_a? String - when 'int' + when :int (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) && (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE) - when 'long' + when :long (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) && (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE) - when 'float', 'double' + when :float, :double datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum) - when 'fixed' + when :fixed datum.is_a?(String) && datum.size == expected_schema.size - when 'enum' + when :enum expected_schema.symbols.include? datum - when 'array' + when :array datum.is_a?(Array) && datum.all?{|d| validate(expected_schema.items, d) } - when 'map' + when :map datum.keys.all?{|k| k.is_a? String } && datum.values.all?{|v| validate(expected_schema.values, v) } - when 'union' + when :union expected_schema.schemas.any?{|s| validate(s, datum) } - when 'record', 'error', 'request' + when :record, :error, :request datum.is_a?(Hash) && expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) } else raise "you suck #{expected_schema.inspect} is not allowed." end end def initialize(type) - @type = type + @type_sym = type.is_a?(Symbol) ? type : type.to_sym end - def type; @type; end + attr_reader :type_sym + # Returns the type as a string (rather than a symbol), for backwards compatibility. + # Deprecated in favor of {#type_sym}. + def type; @type_sym.to_s; end + def ==(other, seen=nil) - other.is_a?(Schema) && @type == other.type + other.is_a?(Schema) && type_sym == other.type_sym end def hash(seen=nil) - @type.hash + type_sym.hash end - def subparse(json_obj, names=nil) + def subparse(json_obj, names=nil, namespace=nil) + if json_obj.is_a?(String) && names + fullname = Name.make_fullname(json_obj, namespace) + return names[fullname] if names.include?(fullname) + end + begin - Schema.real_parse(json_obj, names) + Schema.real_parse(json_obj, names, namespace) rescue => e raise e if e.is_a? SchemaParseError raise SchemaParseError, "Sub-schema for #{self.class.name} not a valid Avro schema. Bad schema: #{json_obj}" end end - def to_avro - {'type' => @type} + def to_avro(names=nil) + {'type' => type} end def to_s Yajl.dump to_avro end @@ -152,33 +174,37 @@ super(type) @name, @namespace = Name.extract_namespace(name, namespace) names = Name.add_name(names, self) end - def to_avro + def to_avro(names=Set.new) + if @name + return fullname if names.include?(fullname) + names << fullname + end props = {'name' => @name} props.merge!('namespace' => @namespace) if @namespace super.merge props end def fullname - Name.make_fullname(@name, @namespace) + @fullname ||= Name.make_fullname(@name, @namespace) end end class RecordSchema < NamedSchema attr_reader :fields - def self.make_field_objects(field_data, names) + def self.make_field_objects(field_data, names, namespace=nil) field_objects, field_names = [], Set.new field_data.each_with_index do |field, i| if field.respond_to?(:[]) # TODO(jmhodges) wtffffff type = field['type'] name = field['name'] default = field['default'] order = field['order'] - new_field = Field.new(type, name, default, order, names) + new_field = Field.new(type, name, default, order, names, namespace) # make sure field name has not been used yet if field_names.include?(new_field.name) raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use" end field_names << new_field.name @@ -188,157 +214,121 @@ field_objects << new_field end field_objects end - def initialize(name, namespace, fields, names=nil, schema_type='record') - if schema_type == 'request' - @type = schema_type + def initialize(name, namespace, fields, names=nil, schema_type=:record) + if schema_type == :request || schema_type == 'request' + @type_sym = schema_type.to_sym + @namespace = namespace else super(schema_type, name, namespace, names) end - @fields = RecordSchema.make_field_objects(fields, names) + @fields = RecordSchema.make_field_objects(fields, names, self.namespace) end def fields_hash - fields.inject({}){|hsh, field| hsh[field.name] = field; hsh } + @fields_hash ||= fields.inject({}){|hsh, field| hsh[field.name] = field; hsh } end - def to_avro - hsh = super.merge('fields' => @fields.map {|f| f.to_avro } ) - if type == 'request' + def to_avro(names=Set.new) + hsh = super + return hsh unless hsh.is_a?(Hash) + hsh['fields'] = @fields.map {|f| f.to_avro(names) } + if type_sym == :request hsh['fields'] else hsh end end end class ArraySchema < Schema - attr_reader :items, :items_schema_from_names - def initialize(items, names=nil) - @items_schema_from_names = false + attr_reader :items - super('array') - - if items.is_a?(String) && names.has_key?(items) - @items = names[items] - @items_schema_from_names = true - else - @items = subparse(items, names) - end + def initialize(items, names=nil, default_namespace=nil) + super(:array) + @items = subparse(items, names, default_namespace) end - def to_avro - name_or_json = if items_schema_from_names - items.fullname - else - items.to_avro - end - super.merge('items' => name_or_json) + def to_avro(names=Set.new) + super.merge('items' => items.to_avro(names)) end end class MapSchema < Schema - attr_reader :values, :values_schema_from_names + attr_reader :values - def initialize(values, names=nil) - @values_schema_from_names = false - super('map') - if values.is_a?(String) && names.has_key?(values) - values_schema = names[values] - @values_schema_from_names = true - else - values_schema = subparse(values, names) - end - @values = values_schema + def initialize(values, names=nil, default_namespace=nil) + super(:map) + @values = subparse(values, names, default_namespace) end - def to_avro - to_dump = super - if values_schema_from_names - to_dump['values'] = values - else - to_dump['values'] = values.to_avro - end - to_dump + def to_avro(names=Set.new) + super.merge('values' => values.to_avro(names)) end end class UnionSchema < Schema - attr_reader :schemas, :schema_from_names_indices - def initialize(schemas, names=nil) - super('union') + attr_reader :schemas + def initialize(schemas, names=nil, default_namespace=nil) + super(:union) + schema_objects = [] - @schema_from_names_indices = [] schemas.each_with_index do |schema, i| - from_names = false - if schema.is_a?(String) && names.has_key?(schema) - new_schema = names[schema] - from_names = true - else - new_schema = subparse(schema, names) - end + new_schema = subparse(schema, names, default_namespace) + ns_type = new_schema.type_sym - ns_type = new_schema.type - if VALID_TYPES.include?(ns_type) && - !NAMED_TYPES.include?(ns_type) && - schema_objects.map{|o| o.type }.include?(ns_type) + if VALID_TYPES_SYM.include?(ns_type) && + !NAMED_TYPES_SYM.include?(ns_type) && + schema_objects.any?{|o| o.type_sym == ns_type } raise SchemaParseError, "#{ns_type} is already in Union" - elsif ns_type == 'union' + elsif ns_type == :union raise SchemaParseError, "Unions cannot contain other unions" else schema_objects << new_schema - @schema_from_names_indices << i if from_names end @schemas = schema_objects end end - def to_avro - # FIXME(jmhodges) this from_name pattern is really weird and - # seems code-smelly. - to_dump = [] - schemas.each_with_index do |schema, i| - if schema_from_names_indices.include?(i) - to_dump << schema.fullname - else - to_dump << schema.to_avro - end - end - to_dump + def to_avro(names=Set.new) + schemas.map {|schema| schema.to_avro(names) } end end class EnumSchema < NamedSchema attr_reader :symbols def initialize(name, space, symbols, names=nil) if symbols.uniq.length < symbols.length fail_msg = 'Duplicate symbol: %s' % symbols raise Avro::SchemaParseError, fail_msg end - super('enum', name, space, names) + super(:enum, name, space, names) @symbols = symbols end - def to_avro - super.merge('symbols' => symbols) + def to_avro(names=Set.new) + avro = super + avro.is_a?(Hash) ? avro.merge('symbols' => symbols) : avro end end # Valid primitive types are in PRIMITIVE_TYPES. class PrimitiveSchema < Schema def initialize(type) - unless PRIMITIVE_TYPES.include? type + if PRIMITIVE_TYPES_SYM.include?(type) + super(type) + elsif PRIMITIVE_TYPES.include?(type) + super(type.to_sym) + else raise AvroError.new("#{type} is not a valid primitive type.") end - - super(type) end - def to_avro + def to_avro(names=nil) hsh = super hsh.size == 1 ? type : hsh end end @@ -347,43 +337,34 @@ def initialize(name, space, size, names=nil) # Ensure valid cto args unless size.is_a?(Fixnum) || size.is_a?(Bignum) raise AvroError, 'Fixed Schema requires a valid integer for size property.' end - super('fixed', name, space, names) + super(:fixed, name, space, names) @size = size end - def to_avro - super.merge('size' => @size) + def to_avro(names=Set.new) + avro = super + avro.is_a?(Hash) ? avro.merge('size' => size) : avro end end class Field < Schema - attr_reader :type, :name, :default, :order, :type_from_names - def initialize(type, name, default=nil, order=nil, names=nil) - @type_from_names = false - if type.is_a?(String) && names && names.has_key?(type) - type_schema = names[type] - @type_from_names = true - else - type_schema = subparse(type, names) - end - @type = type_schema + attr_reader :type, :name, :default, :order + + def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil) + @type = subparse(type, names, namespace) @name = name @default = default @order = order end - def to_avro - sigh_type = type_from_names ? type.fullname : type.to_avro - hsh = { - 'name' => name, - 'type' => sigh_type - } - hsh['default'] = default if default - hsh['order'] = order if order - hsh + def to_avro(names=Set.new) + {'name' => name, 'type' => type.to_avro(names)}.tap do |avro| + avro['default'] = default if default + avro['order'] = order if order + end end end end class SchemaParseError < AvroError; end