Sha256: 0e2ca827c361372015324e4d844e734ba135634cf74864f0d586902c8fda75d9
Contents?: true
Size: 1.14 KB
Versions: 1
Compression:
Stored size: 1.14 KB
Contents
# extract subset of columns from CSV require "csv" class Masticate::MaxRows < Masticate::Base def maxrows(opts) standard_options(opts) groupby = opts[:by] or raise "missing field to group by" maxon = opts[:max] or raise "missing field to max on" @output_count = 0 headers = nil accum = {} with_input do |input| while line = get row = CSV.parse_line(line, csv_options) if !headers headers = row index_by = headers.index(groupby) or raise "Unable to find column '#{groupby}'" index_max = headers.index(maxon) or raise "Unable to find column '#{maxon}'" emit(line) else key = row[index_by] if !accum[key] accum[key] = row else oldscore = accum[key][index_max] newscore = row[index_max] if newscore > oldscore accum[key] = row end end end end end accum.each do |k,row| emit(row.to_csv) end @output.close if opts[:output] { :input_count => @input_count, :output_count => @output_count } end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
masticate-0.1.5 | lib/masticate/max_rows.rb |