lib/chronicle/etl/loaders/json_loader.rb in chronicle-etl-0.5.5 vs lib/chronicle/etl/loaders/json_loader.rb in chronicle-etl-0.6.1

- old
+ new

@@ -1,17 +1,19 @@ +# frozen_string_literal: true + require 'tempfile' module Chronicle module ETL class JSONLoader < Chronicle::ETL::Loader include Chronicle::ETL::Loaders::Helpers::StdoutHelper register_connector do |r| + r.identifier = :json r.description = 'json' end - setting :serializer setting :output # If true, one JSON record per line. If false, output a single json # object with an array of records setting :line_separated, default: true, type: :boolean @@ -24,27 +26,28 @@ def start @output_file = if output_to_stdout? create_stdout_temp_file else - File.open(@config.output, "w+") + File.open(@config.output, 'w+') end @output_file.puts("[\n") unless @config.line_separated end def load(record) - serialized = serializer.serialize(record) + serialized = record.to_h # When dealing with raw data, we can get improperly encoded strings # (eg from sqlite database columns). We force conversion to UTF-8 # before converting into JSON - encoded = serialized.transform_values do |value| - next value unless value.is_a?(String) + # encoded = serialized.transform_values do |value| + # next value unless value.is_a?(String) - force_utf8(value) - end + # force_utf8(value) + # end + encoded = deeply_force_utf8(serialized) line = encoded.to_json # For line-separated output, we just put json + newline if @config.line_separated line = "#{line}\n" @@ -55,10 +58,12 @@ end @output_file.write(line) @first_line = false + # rescue StandardError => e + # binding.pry end def finish # Close the array unless we're doing line-separated JSON @output_file.puts("\n]") unless @config.line_separated @@ -68,12 +73,27 @@ @output_file.close end private - # TODO: implement this - def serializer - @config.serializer || Chronicle::ETL::RawSerializer + # TODO: Move this to a helper module + def deeply_force_utf8(hash) + # FIXME: probably shouldn't happen but it does + return hash.map { |x| force_utf8(x) } if hash.is_a?(Array) + return force_utf8(hash) unless hash.is_a?(Hash) + + hash.transform_values do |value| + case value + when String + force_utf8(value) + when Hash + deeply_force_utf8(value) + when Array + value.map { |v| deeply_force_utf8(v) } + else + value + end + end end end end end