Sha256: 1c7fd7c86df89fc6943f20d1feee91bc9c9bc6a160f93c6f3a3b736a73edfdb8
Contents?: true
Size: 1.92 KB
Versions: 2
Compression:
Stored size: 1.92 KB
Contents
## # FILTER READS WITH LOW ABUNDANCE KMERS # module Lederhosen class CLI desc "k_filter", "--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50" method_option :input, :type => :string, :required => true method_option :output, :type => :string, :required => true method_option :k, :type => :numeric, :required => true method_option :cutoff, :type => :numeric, :required => true def k_filter input = options[:input] output = options[:output] k_len = options[:k].to_i cutoff = options[:cutoff] counting_table = Hash.new { |h, k| h[k] = 0 } total_reads = 0 ohai "counting kmers" File.open(input) do |handle| records = Dna.new handle records.each do |r| total_reads += 1 kmers = r.sequence.to_kmers(k_len) kmers.each { |x| counting_table[x] += 1 } end end sum_of_kmers = counting_table.values.inject(:+) ohai "total reads = #{total_reads}" ohai "sum of kmers = #{sum_of_kmers}" kept = 0 total_reads = total_reads.to_f pbar = ProgressBar.new "saving", total_reads.to_i output = File.open(output, 'w') File.open(input) do |handle| records = Dna.new handle records.each do |r| kmers = r.sequence.to_kmers(k_len) # check if any of the kmers are rare keep = true coverage = 0 kmers.each do |kmer| # if any of the kmers are rare, don't print the read c = counting_table[kmer] coverage += c if c < cutoff keep = false break end end if keep kept += 1 output.puts r end pbar.inc end end pbar.finish ohai "survivors = #{kept} (#{kept/total_reads.to_f})" output.close end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
lederhosen-0.1.4 | lib/lederhosen/tasks/k_filter.rb |
lederhosen-0.1.3 | lib/lederhosen/tasks/k_filter.rb |