Sha256: f453d192e8ba50084a104dedf7f6884d3a0637339db5a419e8fad90e9fb025fa

Contents?: true

Size: 1.66 KB

Versions: 6

Compression:

Stored size: 1.66 KB

Contents

# frozen_string_literal: true

module JobIteration
  # CsvEnumerator makes it possible to write an Iteration job
  # that uses CSV file as a collection to Iterate.
  # @example
  #   def build_enumerator(cursor:)
  #     csv = CSV.open('tmp/files', { converters: :integer, headers: true })
  #     JobIteration::CsvEnumerator.new(csv).rows(cursor: cursor)
  #   end
  #
  #   def each_iteration(row)
  #     ...
  #   end
  class CsvEnumerator
    # Constructs CsvEnumerator instance based on a CSV file.
    # @param [CSV] csv An instance of CSV object
    # @return [JobIteration::CsvEnumerator]
    # @example
    #   csv = CSV.open('tmp/files', { converters: :integer, headers: true })
    #   JobIteration::CsvEnumerator.new(csv).rows(cursor: cursor)
    def initialize(csv)
      unless csv.instance_of?(CSV)
        raise ArgumentError, "CsvEnumerator.new takes CSV object"
      end

      @csv = csv
    end

    # Constructs a enumerator on CSV rows
    # @return [Enumerator] Enumerator instance
    def rows(cursor:)
      @csv.lazy
        .each_with_index
        .drop(cursor.to_i)
        .to_enum { count_rows_in_file }
    end

    # Constructs a enumerator on batches of CSV rows
    # @return [Enumerator] Enumerator instance
    def batches(batch_size:, cursor:)
      @csv.lazy
        .each_slice(batch_size)
        .each_with_index
        .drop(cursor.to_i)
        .to_enum { (count_rows_in_file.to_f / batch_size).ceil }
    end

    private

    def count_rows_in_file
      begin
        filepath = @csv.path
      rescue NoMethodError
        return
      end

      count = `wc -l < #{filepath}`.strip.to_i
      count -= 1 if @csv.headers
      count
    end
  end
end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
job-iteration-0.9.6 lib/job-iteration/csv_enumerator.rb
job-iteration-0.9.5 lib/job-iteration/csv_enumerator.rb
job-iteration-0.9.4 lib/job-iteration/csv_enumerator.rb
job-iteration-0.9.3 lib/job-iteration/csv_enumerator.rb
job-iteration-0.9.2 lib/job-iteration/csv_enumerator.rb
job-iteration-0.9.1 lib/job-iteration/csv_enumerator.rb