Sha256: 12e3508dec7c33483e94b0b67cfb5ed281917c6e86f726489867fbb05123c33a

Contents?: true

Size: 1.66 KB

Versions: 1

Compression:

Stored size: 1.66 KB

Contents

require 'set'

module Lederhosen
  class CLI

    desc 'otu_filter', 'works like uc_filter but uses an OTU table as input'

    method_option :input,   :type => :string,  :required => true
    method_option :output,  :type => :string,  :required => true
    method_option :reads,   :type => :numeric, :required => true
    method_option :samples, :type => :numeric, :required => true

    def otu_filter
      input       = options[:input]
      output      = options[:output]
      reads       = options[:reads]
      min_samples = options[:samples]

      ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples}), saving to #{output}"

      cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new }

      # slurp up CSV file
      File.open input do |handle|
        header = handle.gets.strip.split(',')
        cluster_ids = header[1..-1]
        handle.each do |line|
          line = line.strip.split(',')
          sample_id = line[0].to_sym
          counts = line[1..-1].map(&:to_i)
          cluster_ids.zip(counts).each do |cluster, count|
            cluster_sample_count[cluster][sample_id] = count
          end
        end
      end

      # filter sample_cluster_count
      filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }

      ohai "kept #{filtered.keys.size} clusters (#{filtered.keys.size/cluster_sample_count.size.to_f})."
      kept_reads = filtered.values.map { |x| x.values.inject(:+) }.inject(:+)
      total_reads = cluster_sample_count.values.map { |x| x.values.inject(:+) }.inject(:+)
      ohai "kept #{kept_reads}/#{total_reads} reads (#{kept_reads/total_reads.to_f})."
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
lederhosen-0.3.6 lib/lederhosen/tasks/otu_filter.rb