Sha256: 975d6e6ca3a7516bfa872832a6b86200ad8cbe41cb05ca8d23d95d039def26c9

Contents?: true

Size: 1.44 KB

Versions: 5

Compression:

Stored size: 1.44 KB

Contents

require 'set'

module Chicago
  module ETL
    # An end point to write rows.
    #
    # @abstract
    # @api public
    class Sink
      # Returns the column names expected to be written to this sink.
      # @api public
      attr_reader :column_names

      # @abstract
      def initialize(output, column_names, unique_row_key=nil)
        @output = output
        @column_names = column_names
        @written_rows = Set.new
        @unique_row_key = unique_row_key
      end

      # Writes a row to the output.
      #
      # Row will not be written to the output if it has already been
      # written, as identified by the unique row key.
      #
      # Should not be overridden by subclasses - overwrite write instead.
      def <<(row)
        unless written?(row)
          write row
          @written_rows << row[@unique_row_key]
        end
      end
      
      # Flushes any remaining writes to the output.
      #
      # By default does nothing, subclasses should override where
      # necessary.
      def flush
      end

      # Returns true if this row has previously been written to the
      # output.
      #
      # Always returns false if no key to determine row uniqueness has
      # been provided.
      def written?(row)
        return false if @unique_row_key.nil?
        @written_rows.include?(row[@unique_row_key])
      end

      protected

      attr_reader :output

      # @abstract
      def write(row)
      end
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
chicago-etl-0.0.13 lib/chicago/etl/sink.rb
chicago-etl-0.0.12 lib/chicago/etl/sink.rb
chicago-etl-0.0.11 lib/chicago/etl/sink.rb
chicago-etl-0.0.10 lib/chicago/etl/sink.rb
chicago-etl-0.0.9 lib/chicago/etl/sink.rb