Sha256: 4471d7b1c211b6d9b051efc744ed7a424e35427c32f936d5e1de222e644461d7
Contents?: true
Size: 1.42 KB
Versions: 13
Compression:
Stored size: 1.42 KB
Contents
## # SQUISH A CSV FILE BY COLUMN NAME # module Lederhosen class CLI desc 'squish', 'merge cell values (reads) in a csv file by column name (cluster)' method_option :csv_file, :type => :string, :required => true method_option :output, :type => :string, :required => false def squish csv_file = options[:csv_file] output = options[:output] || $stdout # sample_name -> column name -> total number of reads total_by_sample_by_column = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } column_names = '' # scope # Load CSV file, merge counts in columns with the same name File.open(csv_file) do |handle| column_names = handle.gets.strip.split(',')[1..-1] handle.each do |line| line = line.strip.split(',') sample = line[0] line[1..-1].zip(column_names) do |reads, column_name| total_by_sample_by_column[sample][column_name] += reads.to_i end end end output = File.open(output, 'w') rescue $stdout # print the new, squished csv file column_names.uniq!.sort! output.puts "-,#{column_names.join(',')}" total_by_sample_by_column.each_pair do |sample_id, row| output.print "#{sample_id}" column_names.each do |column_name| output.print ",#{row[column_name]}" end output.print "\n" end output.close end end end
Version data entries
13 entries across 13 versions & 1 rubygems