Sha256: 2673834233ae41d5ac4c576309a833fe75bb6490de08aef682d34d90a84181f4

Contents?: true

Size: 1.72 KB

Versions: 5

Compression:

Stored size: 1.72 KB

Contents

module Bio
  class Sequence
    class NA
      # Return the current object or its reverse complement, whichever
      # has the sequence that comes first in lexigraphical (alphabetical)
      # order
      def lowest_lexigraphical_form
        rev = self.reverse_complement
        to_s < rev.to_s ? self : rev
      end
    end

    class Kmer
      # Return a hash of Strings to 0, for each kmer of length k. For instance
      # empty_full_kmer_hash(1) => {'A'=>0, 'T'=>0, 'C'=>0, 'G'=>0}
      def self.empty_full_kmer_hash(k=4)
        return @empty_full_hash.dup unless @empty_full_hash.nil?
        
        counts = {}
                
        ordered_possibilities = %w(A T C G)
        keys = ordered_possibilities
        (k-1).times do
          keys = keys.collect{|k| ordered_possibilities.collect{|n| "#{k}#{n}"}.flatten}.flatten
        end
        
        keys.each do |key|
          counts[key] = 0
        end
        counts
      end
      
      # Take a kmer hash, and merge those keys to the lowest lexigraphical form
      # (See Bio::Sequence::NA#lowest_lexigraphical_form for what this means)
      # When 2 keys are reverse complements they get merged into one hash entry, 
      # where the key is the lowest_lexigraphical_form of the two and the
      # value is the sum of the original 2 values
      # 
      # For instance {'A'=>2,'T'=>5} #=> {'A'=>7}  
      def self.merge_down_to_lowest_lexigraphical_form(hash)
        keys = empty_full_kmer_hash.keys
        
        new_hash = {}
        hash.each do |kmer, count|
          key = Bio::Sequence::NA.new(kmer).lowest_lexigraphical_form.to_s.upcase
          new_hash[key] ||= 0
          new_hash[key] += count
        end
        return new_hash
      end
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
bio-kmer_counter-0.1.2 lib/bio-kmer_counter/kmer_counter.rb
bio-kmer_counter-0.1.1 lib/bio-kmer_counter/kmer_counter.rb
bio-kmer_counter-0.1.0 lib/bio-kmer_counter/kmer_counter.rb
bio-kmer_counter-0.0.4 lib/bio-kmer_counter/kmer_counter.rb
bio-kmer_counter-0.0.3 lib/bio-kmer_counter/kmer_counter.rb