Sha256: 161bd5ebbabce69b7ad3bf1071ae0fa40c0b4a453a694bb82ba03ef7f7a7ffa2
Contents?: true
Size: 1.46 KB
Versions: 22
Compression:
Stored size: 1.46 KB
Contents
## # SQUISH A CSV FILE BY COLUMN NAME # module Lederhosen class CLI desc 'squish', 'merge cell values (reads) in a csv file by column name (cluster)' method_option :csv_file, :type => :string, :required => true method_option :output, :type => :string, :required => false def squish csv_file = options[:csv_file] output = options[:output] || $stdout ohai "squishing #{csv_file} to #{output}" # sample_name -> column name -> total number of reads total_by_sample_by_column = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } column_names = '' # scope # Load CSV file, merge counts in columns with the same name File.open(csv_file) do |handle| column_names = handle.gets.strip.split(',')[1..-1] handle.each do |line| line = line.strip.split(',') sample = line[0] line[1..-1].zip(column_names) do |reads, column_name| total_by_sample_by_column[sample][column_name] += reads.to_i end end end output = File.open(output, 'w') rescue $stdout # print the new, squished csv file column_names.uniq!.sort! output.puts "-,#{column_names.join(',')}" total_by_sample_by_column.each_pair do |sample_id, row| output.print "#{sample_id}" column_names.each do |column_name| output.print ",#{row[column_name]}" end output.print "\n" end output.close end end end
Version data entries
22 entries across 22 versions & 1 rubygems