Sha256: 0e2ca827c361372015324e4d844e734ba135634cf74864f0d586902c8fda75d9

Contents?: true

Size: 1.14 KB

Versions: 1

Compression:

Stored size: 1.14 KB

Contents

# extract subset of columns from CSV
require "csv"

class Masticate::MaxRows < Masticate::Base
  def maxrows(opts)
    standard_options(opts)

    groupby = opts[:by] or raise "missing field to group by"
    maxon = opts[:max] or raise "missing field to max on"

    @output_count = 0
    headers = nil
    accum = {}
    with_input do |input|
      while line = get
        row = CSV.parse_line(line, csv_options)
        if !headers
          headers = row
          index_by = headers.index(groupby) or raise "Unable to find column '#{groupby}'"
          index_max = headers.index(maxon) or raise "Unable to find column '#{maxon}'"
          emit(line)
        else
          key = row[index_by]
          if !accum[key]
            accum[key] = row
          else
            oldscore = accum[key][index_max]
            newscore = row[index_max]
            if newscore > oldscore
              accum[key] = row
            end
          end
        end
      end
    end

    accum.each do |k,row|
      emit(row.to_csv)
    end

    @output.close if opts[:output]

    {
      :input_count => @input_count,
      :output_count => @output_count
    }
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
masticate-0.1.5 lib/masticate/max_rows.rb