Sha256: 6d5ecf3fe574933e53a83d521ebf4a0f98557f4ebb44f109822b14d94defa271
Contents?: true
Size: 1.32 KB
Versions: 3
Compression:
Stored size: 1.32 KB
Contents
# extract subset of columns from CSV require "csv" class Masticate::MaxRows < Masticate::Base def maxrows(opts) @output = opts[:output] ? File.open(opts[:output], "w") : $stdout csv_options = {} csv_options[:col_sep] = opts[:col_sep] if opts[:col_sep] csv_options[:quote_char] = opts[:quote_char] || "\0" groupby = opts[:by] or raise "missing field to group by" maxon = opts[:max] or raise "missing field to max on" @output_count = 0 headers = nil accum = {} with_input do |input| while line = get row = CSV.parse_line(line, csv_options) if !headers headers = row index_by = headers.index(groupby) or raise "Unable to find column '#{groupby}'" index_max = headers.index(maxon) or raise "Unable to find column '#{maxon}'" emit(line) else key = row[index_by] if !accum[key] accum[key] = row else oldscore = accum[key][index_max] newscore = row[index_max] if newscore > oldscore accum[key] = row end end end end end accum.each do |k,row| emit(row.to_csv) end @output.close if opts[:output] { :input_count => input_count, :output_count => @output_count } end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
masticate-0.1.4 | lib/masticate/max_rows.rb |
masticate-0.1.3 | lib/masticate/max_rows.rb |
masticate-0.1.1 | lib/masticate/max_rows.rb |