Sha256: 209854eb3e2e870d4ee6ecfc6bd0686133c17a4a5bee4c26055f05e6d1bf2f8b

Contents?: true

Size: 1.87 KB

Versions: 2

Compression:

Stored size: 1.87 KB

Contents

##
# MAKE TABLES
#

module Lederhosen
  class CLI

    desc "otu_table",
         "create an OTU abundance matrix from taxonomy count files"

    method_option :files,  :type => :string, :required => true
    method_option :level,  :type => :string, :required => true
    method_option :output, :type => :string, :required => true

    def otu_table
      inputs = Dir[options[:files]]
      level  = options[:level].downcase
      output = options[:output]

      ohai "Generating OTU matrix from #{inputs.size} inputs at #{level} level and saving to #{output}."

      # sanity check
      fail "bad level: #{level}" unless %w{domain phylum class order family genus species kingdom original}.include? level      
      fail 'no inputs matched your glob' if inputs.size == 0

      sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }

      # create a progress bar with the total number of bytes of
      # the files we're slurping up
      pbar = ProgressBar.new "loading", inputs.size

      inputs.each do |input_file|
        File.open(input_file).each do |line|
          next if line =~ /^#/ # skip header(s)
          line = line.strip.split(',')
          taxonomy, count = line
          count = count.to_i
          tax =
            if taxonomy == 'unclassified_reads'
              'unclassified_reads'
            else
              parse_taxonomy(taxonomy)[level]
            end
          sample_cluster_count[input_file][tax] += count
        end
      end

      all_clusters = sample_cluster_count.values.map(&:keys).flatten.uniq.sort

      out = File.open(output, 'w')
      
      out.puts all_clusters.join(',')
      inputs.sort.each do |input|
        out.print "#{input}"
        all_clusters.each do |c|
          out.print ",#{sample_cluster_count[input][c]}"
        end
        out.print "\n"
      end

    end

  end # class CLI
end # module Lederhosen

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
lederhosen-2.0.1 lib/lederhosen/tasks/otu_table.rb
lederhosen-2.0.0 lib/lederhosen/tasks/otu_table.rb