lib/fluent/plugin/out_kafka_buffered.rb in fluent-plugin-kafka-enchanced-0.5.31 vs lib/fluent/plugin/out_kafka_buffered.rb in fluent-plugin-kafka-enchanced-0.5.32
- old
+ new
@@ -200,27 +200,56 @@
require "avro_turf"
require 'avro_turf/messaging'
require "avro/builder"
init_redis
Proc.new do |tag, time, record|
- record = record.select{|key, value| !key.nil? && !key.empty?}.map{|k, v| [k.tr('[]-', '_').delete('$'), ((!v.is_a?(Fixnum) && !v.is_a?(Float)) ? v.to_s.force_encoding("UTF-8") : v)]}.to_h
+ record = record.select{|key, value| !key.nil? && !key.empty?}.map do |k, v|
+ [
+ k.tr('[]-', '_').delete('$'),
+ (v.is_a?(Fixnum) || v.is_a?(Float) || v.nil? ? v : v.to_s.force_encoding("UTF-8"))
+ ]
+ end.to_h
timestamp = Time.new
record['enchilada_time_with_format'] = timestamp.strftime("%Y-%m-%dT%H:%M:%S.%LZ")
+ @topic_name = schema_name = "#{tag.to_s.tr('.$:', '_')}_#{Digest::MD5.new.hexdigest(record.keys.to_s)[0..5]}"
- fields = record.map{|key, value| {'name' => key, 'type' => (value.is_a?(Fixnum) ? 'int' : (value.is_a?(Float) ? 'float' : 'string'))}}
+ avro = AvroTurf::Messaging.new(registry_url: @schema_registry)
+
+ unless (stored_schema = get_schema_from_redis_by_name(schema_name))
+ fields = record.map do |key, value|
+ {
+ 'name' => key,
+ 'type' => ['null', (value.is_a?(Fixnum) ? 'int' : (value.is_a?(Float) ? 'float' : 'string'))]
+ }
+ end
+ field_types = fields.map{|field| [field['name'], (field['type'] - ['null']).first]}.to_h
+ fields << {"name" => "enchilada_timestamp", "type" => "long"}
+ schema_json = {
+ "type": "record",
+ "name": schema_name,
+ "fields": fields
+ }.to_json
+ registry = avro.instance_variable_get('@registry')
+ schema = Avro::Schema.parse(schema_json)
+ schema_id = registry.register("#{schema_name}-value", schema)
+
+ stored_schema = {
+ 'schema_json' => schema_json,
+ 'schema_id' => schema_id,
+ 'field_types' => field_types
+ }
+
+ set_schema_to_redis(schema_name, stored_schema)
+
+ end
+
record['enchilada_timestamp'] = timestamp.strftime('%s%3N').to_i
- fields << {"name" => "enchilada_timestamp", "type" => "long"}
- @topic_name = schema_name = "#{tag.to_s.tr('.$:', '_')}_#{Digest::MD5.new.hexdigest(fields.to_s)[0..5]}"
- schema_json = {
- "type": "record",
- "name": schema_name,
- "fields": fields
- }.to_json
- schema = Avro::Schema.parse(schema_json)
+ record = record.map do |key, val|
+ [key, (stored_schema['field_types'][key] != 'string' || val.nil? ? val : val.to_s)]
+ end.to_h
- avro = AvroTurf::Messaging.new(registry_url: @schema_registry)
- avro.encode(record, schema: schema, subject: "#{schema_name}-value")
+ avro.encode(record, stored_schema['schema_id'], schema: Avro::Schema.parse(stored_schema['schema_json']))
end
elsif @output_data_type =~ /^attr:(.*)$/
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
@custom_attributes.unshift('time') if @output_include_time
@custom_attributes.unshift('tag') if @output_include_tag
@@ -316,10 +345,10 @@
def get_schema_from_redis_by_name schema_name
if stored_schema = $redis.get(schema_name)
parsed_schema = JSON.parse($redis.get(schema_name))
{
'schema_id' => parsed_schema['schema_id'],
- 'schema' => Avro::Schema.parse(parsed_schema['schema']),
+ 'schema_json' => Avro::Schema.parse(parsed_schema['schema_json']),
'field_types' => parsed_schema['field_types']
}
end
end
end