Sha256: 6d5ecf3fe574933e53a83d521ebf4a0f98557f4ebb44f109822b14d94defa271

Contents?: true

Size: 1.32 KB

Versions: 3

Compression:

Stored size: 1.32 KB

Contents

# extract subset of columns from CSV
require "csv"

class Masticate::MaxRows < Masticate::Base
  def maxrows(opts)
    @output = opts[:output] ? File.open(opts[:output], "w") : $stdout
    csv_options = {}
    csv_options[:col_sep] = opts[:col_sep] if opts[:col_sep]
    csv_options[:quote_char] = opts[:quote_char] || "\0"

    groupby = opts[:by] or raise "missing field to group by"
    maxon = opts[:max] or raise "missing field to max on"

    @output_count = 0
    headers = nil
    accum = {}
    with_input do |input|
      while line = get
        row = CSV.parse_line(line, csv_options)
        if !headers
          headers = row
          index_by = headers.index(groupby) or raise "Unable to find column '#{groupby}'"
          index_max = headers.index(maxon) or raise "Unable to find column '#{maxon}'"
          emit(line)
        else
          key = row[index_by]
          if !accum[key]
            accum[key] = row
          else
            oldscore = accum[key][index_max]
            newscore = row[index_max]
            if newscore > oldscore
              accum[key] = row
            end
          end
        end
      end
    end

    accum.each do |k,row|
      emit(row.to_csv)
    end

    @output.close if opts[:output]

    {
      :input_count => input_count,
      :output_count => @output_count
    }
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
masticate-0.1.4 lib/masticate/max_rows.rb
masticate-0.1.3 lib/masticate/max_rows.rb
masticate-0.1.1 lib/masticate/max_rows.rb