Sha256: db29bf652524101a8a8fb845f345787432d43df6d70d4d681d7f81223d6a138a
Contents?: true
Size: 1.93 KB
Versions: 4
Compression:
Stored size: 1.93 KB
Contents
## # FILTER READS WITH LOW ABUNDANCE KMERS # module Lederhosen class CLI desc "k_filter khmer filtering", "--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50" method_option :input, :type => :string, :required => true method_option :output, :type => :string, :required => true method_option :k, :type => :numeric, :required => true method_option :cutoff, :type => :numeric, :required => true def k_filter input = options[:input] output = options[:output] k_len = options[:k].to_i cutoff = options[:cutoff] counting_table = Hash.new { |h, k| h[k] = 0 } total_reads = 0 ohai "counting kmers" File.open(input) do |handle| records = Dna.new handle records.each do |r| total_reads += 1 kmers = r.sequence.to_kmers(k_len) kmers.each { |x| counting_table[x] += 1 } end end sum_of_kmers = counting_table.values.inject(:+) ohai "total reads = #{total_reads}" ohai "sum of kmers = #{sum_of_kmers}" kept = 0 total_reads = total_reads.to_f pbar = ProgressBar.new "saving", total_reads.to_i output = File.open(output, 'w') File.open(input) do |handle| records = Dna.new handle records.each do |r| kmers = r.sequence.to_kmers(k_len) # check if any of the kmers are rare keep = true coverage = 0 kmers.each do |kmer| # if any of the kmers are rare, don't print the read c = counting_table[kmer] coverage += c if c < cutoff keep = false break end end if keep kept += 1 output.puts r end pbar.inc end end pbar.finish ohai "survivors = #{kept} (#{kept/total_reads.to_f})" output.close end end end
Version data entries
4 entries across 4 versions & 1 rubygems