lib/fluent/plugin/out_kafka_buffered.rb in fluent-plugin-kafka-enchanced-0.5.31 vs lib/fluent/plugin/out_kafka_buffered.rb in fluent-plugin-kafka-enchanced-0.5.32

- old
+ new

@@ -200,27 +200,56 @@ require "avro_turf" require 'avro_turf/messaging' require "avro/builder" init_redis Proc.new do |tag, time, record| - record = record.select{|key, value| !key.nil? && !key.empty?}.map{|k, v| [k.tr('[]-', '_').delete('$'), ((!v.is_a?(Fixnum) && !v.is_a?(Float)) ? v.to_s.force_encoding("UTF-8") : v)]}.to_h + record = record.select{|key, value| !key.nil? && !key.empty?}.map do |k, v| + [ + k.tr('[]-', '_').delete('$'), + (v.is_a?(Fixnum) || v.is_a?(Float) || v.nil? ? v : v.to_s.force_encoding("UTF-8")) + ] + end.to_h timestamp = Time.new record['enchilada_time_with_format'] = timestamp.strftime("%Y-%m-%dT%H:%M:%S.%LZ") + @topic_name = schema_name = "#{tag.to_s.tr('.$:', '_')}_#{Digest::MD5.new.hexdigest(record.keys.to_s)[0..5]}" - fields = record.map{|key, value| {'name' => key, 'type' => (value.is_a?(Fixnum) ? 'int' : (value.is_a?(Float) ? 'float' : 'string'))}} + avro = AvroTurf::Messaging.new(registry_url: @schema_registry) + + unless (stored_schema = get_schema_from_redis_by_name(schema_name)) + fields = record.map do |key, value| + { + 'name' => key, + 'type' => ['null', (value.is_a?(Fixnum) ? 'int' : (value.is_a?(Float) ? 'float' : 'string'))] + } + end + field_types = fields.map{|field| [field['name'], (field['type'] - ['null']).first]}.to_h + fields << {"name" => "enchilada_timestamp", "type" => "long"} + schema_json = { + "type": "record", + "name": schema_name, + "fields": fields + }.to_json + registry = avro.instance_variable_get('@registry') + schema = Avro::Schema.parse(schema_json) + schema_id = registry.register("#{schema_name}-value", schema) + + stored_schema = { + 'schema_json' => schema_json, + 'schema_id' => schema_id, + 'field_types' => field_types + } + + set_schema_to_redis(schema_name, stored_schema) + + end + record['enchilada_timestamp'] = timestamp.strftime('%s%3N').to_i - fields << {"name" => "enchilada_timestamp", "type" => "long"} - @topic_name = schema_name = "#{tag.to_s.tr('.$:', '_')}_#{Digest::MD5.new.hexdigest(fields.to_s)[0..5]}" - schema_json = { - "type": "record", - "name": schema_name, - "fields": fields - }.to_json - schema = Avro::Schema.parse(schema_json) + record = record.map do |key, val| + [key, (stored_schema['field_types'][key] != 'string' || val.nil? ? val : val.to_s)] + end.to_h - avro = AvroTurf::Messaging.new(registry_url: @schema_registry) - avro.encode(record, schema: schema, subject: "#{schema_name}-value") + avro.encode(record, stored_schema['schema_id'], schema: Avro::Schema.parse(stored_schema['schema_json'])) end elsif @output_data_type =~ /^attr:(.*)$/ @custom_attributes = $1.split(',').map(&:strip).reject(&:empty?) @custom_attributes.unshift('time') if @output_include_time @custom_attributes.unshift('tag') if @output_include_tag @@ -316,10 +345,10 @@ def get_schema_from_redis_by_name schema_name if stored_schema = $redis.get(schema_name) parsed_schema = JSON.parse($redis.get(schema_name)) { 'schema_id' => parsed_schema['schema_id'], - 'schema' => Avro::Schema.parse(parsed_schema['schema']), + 'schema_json' => Avro::Schema.parse(parsed_schema['schema_json']), 'field_types' => parsed_schema['field_types'] } end end end