Sha256: 01727f4a3c4259237e5c5b721f26c9ae3e39ff53bc34d900ce1d332a6a9447a0

Contents?: true

Size: 1.52 KB

Versions: 17

Compression:

Stored size: 1.52 KB

Contents

require 'arc-furnace/sink'
require 'msgpack'

module ArcFurnace
  class AllFieldsCSVSink < Sink
    private_attr_reader :csv, :fields, :tmp_file, :packer, :fields, :field_mappings

    def initialize(filename: , encoding: 'UTF-8', force_quotes: false)
      @tmp_file = Tempfile.new('intermediate_results', encoding: 'binary')
      @packer = MessagePack::Packer.new(tmp_file)
      @csv = CSV.open(filename, 'wb', encoding: encoding, headers: true, force_quotes: force_quotes)
      @fields = {}
    end

    def finalize
      packer.flush
      tmp_file.rewind

      write_header_row!

      unpacker = MessagePack::Unpacker.new(tmp_file)
      unpacker.each do |hash|
        write_row(hash)
      end

      csv.close
    end

    def row(hash)
      update_field_counts(hash)
      packer.write(hash)
    end

    private

    def write_header_row!
      header_row = []
      fields.each do |key, count|
        count.times { header_row << key }
      end
      csv << header_row
    end

    def write_row(hash)
      row = []
      fields.each do |key, count|
        values = Array.wrap(hash[key.to_s])
        (values.slice(0, count) || []).each do |value|
          row << value
        end
        (count - values.length).times { row << nil }
      end
      csv << row
    end

    def update_field_counts(hash)
      hash.each do |key, values|
        value_count = Array.wrap(values).size
        existing_value_count = fields[key] || 0
        fields[key] = value_count if value_count > existing_value_count
      end
    end
  end
end

Version data entries

17 entries across 17 versions & 1 rubygems

Version Path
arc-furnace-0.1.36 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.35 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.34 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.33 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.32 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.31 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.30 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.29 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.28 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.27 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.26 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.25 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.24 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.23 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.22 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.21 lib/arc-furnace/all_fields_csv_sink.rb
arc-furnace-0.1.20 lib/arc-furnace/all_fields_csv_sink.rb