Sha256: fc2ab34c635bab7c1fa4422c3c6b17e10417e1d45d5780f30d32a4c223eb04a7
Contents?: true
Size: 1.93 KB
Versions: 6
Compression:
Stored size: 1.93 KB
Contents
## # FILTER READS WITH LOW ABUNDANCE KMERS # module Lederhosen class CLI desc "k_filter", "filter novel reads likely to form small/singleton clusters (experimental)" method_option :input, :type => :string, :required => true method_option :output, :type => :string, :required => true method_option :k, :type => :numeric, :required => true method_option :cutoff, :type => :numeric, :required => true def k_filter input = options[:input] output = options[:output] k_len = options[:k].to_i cutoff = options[:cutoff] counting_table = Hash.new { |h, k| h[k] = 0 } total_reads = 0 ohai "counting kmers" File.open(input) do |handle| records = Dna.new handle records.each do |r| total_reads += 1 kmers = r.sequence.to_kmers(k_len) kmers.each { |x| counting_table[x] += 1 } end end sum_of_kmers = counting_table.values.inject(:+) ohai "total reads = #{total_reads}" ohai "sum of kmers = #{sum_of_kmers}" kept = 0 total_reads = total_reads.to_f pbar = ProgressBar.new "saving", total_reads.to_i output = File.open(output, 'w') File.open(input) do |handle| records = Dna.new handle records.each do |r| kmers = r.sequence.to_kmers(k_len) # check if any of the kmers are rare keep = true coverage = 0 kmers.each do |kmer| # if any of the kmers are rare, don't print the read c = counting_table[kmer] coverage += c if c < cutoff keep = false break end end if keep kept += 1 output.puts r end pbar.inc end end pbar.finish ohai "survivors = #{kept} (#{kept/total_reads.to_f})" output.close end end end
Version data entries
6 entries across 6 versions & 1 rubygems