Sha256: 602d222cf9d0cac760ab0722ed41ca6f4410d260abfdceb6bc7fe1089bbf216b
Contents?: true
Size: 1.27 KB
Versions: 3
Compression:
Stored size: 1.27 KB
Contents
## # SQUISH A CSV FILE BY COLUMN NAME # module Lederhosen class CLI desc 'squish', 'merge cell values (reads) in a csv file by column name (cluster)' method_option :csv_file, :type => :string, :required => true method_option :output, :type => :string, :required => false def squish csv_file = options[:csv_file] output = options[:output] || $stdout # sample_name -> column name -> total number of reads total_by_sample_by_column = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } column_names = '' # scope # Load CSV file, merge counts in columns with the same name File.open(csv_file) do |handle| column_names = handle.gets.strip.split(',')[1..-1] handle.each do |line| line = line.strip.split(',') sample = line[0] line[1..-1].zip(column_names) do |reads, column_name| total_by_sample_by_column[sample][column_name] += reads.to_i end end end output = File.open(output) rescue $stdout # print the new, squished csv file column_names.uniq!.sort! puts "-,#{column_names.join(',')}" total_by_sample_by_column.each_pair do |sample_id, row| print "#{sample_id}" column_names.each do |column_name| print ",#{row[column_name]}" end print "\n" end output.close end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
lederhosen-0.1.5 | lib/lederhosen/tasks/squish.rb |
lederhosen-0.1.4 | lib/lederhosen/tasks/squish.rb |
lederhosen-0.1.3 | lib/lederhosen/tasks/squish.rb |