lib/parse_fasta/sequence.rb in parse_fasta-1.7.1 vs lib/parse_fasta/sequence.rb in parse_fasta-1.7.2

- old
+ new

@@ -18,10 +18,20 @@ # Provide some methods for dealing with common tasks regarding # nucleotide sequences. class Sequence < String + # Strips whitespace from the str argument before calling super + # + # @return [Sequence] A Sequence string + # + # @example Removes whitespace + # Sequence.new "AA CC TT" #=> "AACCTT" + def initialize(str) + super(str.gsub(/ +/, "")) + end + # Calculates GC content # # Calculates GC content by dividing count of G + C divided by count # of G + C + T + A + U. If there are both T's and U's in the # Sequence, things will get weird, but then again, that wouldn't @@ -43,11 +53,11 @@ c = s.count('c') g = s.count('g') t = s.count('t') a = s.count('a') u = s.count('u') - + return 0 if c + g + t + a + u == 0 return (c + g) / (c + g + t + a + u).to_f end # Returns a map of base counts @@ -85,13 +95,13 @@ counts[:u] = u elsif t > 0 && u > 0 warn('ERROR: A sequence contains both T and U') counts[:t], counts[:u] = t, u end - + counts[:n] = s.count('n') if count_ambiguous_bases - + counts end # Returns a map of base frequencies # @@ -114,10 +124,10 @@ # # @return [Hash] A hash with base as key, frequency as value def base_frequencies(count_ambiguous_bases=nil) base_counts = self.base_counts(count_ambiguous_bases) total_bases = base_counts.values.reduce(:+).to_f - base_freqs = + base_freqs = base_counts.map { |base, count| [base, count/total_bases] }.flatten Hash[*base_freqs] end end