lib/parse_fasta/sequence.rb in parse_fasta-1.7.1 vs lib/parse_fasta/sequence.rb in parse_fasta-1.7.2
- old
+ new
@@ -18,10 +18,20 @@
# Provide some methods for dealing with common tasks regarding
# nucleotide sequences.
class Sequence < String
+ # Strips whitespace from the str argument before calling super
+ #
+ # @return [Sequence] A Sequence string
+ #
+ # @example Removes whitespace
+ # Sequence.new "AA CC TT" #=> "AACCTT"
+ def initialize(str)
+ super(str.gsub(/ +/, ""))
+ end
+
# Calculates GC content
#
# Calculates GC content by dividing count of G + C divided by count
# of G + C + T + A + U. If there are both T's and U's in the
# Sequence, things will get weird, but then again, that wouldn't
@@ -43,11 +53,11 @@
c = s.count('c')
g = s.count('g')
t = s.count('t')
a = s.count('a')
u = s.count('u')
-
+
return 0 if c + g + t + a + u == 0
return (c + g) / (c + g + t + a + u).to_f
end
# Returns a map of base counts
@@ -85,13 +95,13 @@
counts[:u] = u
elsif t > 0 && u > 0
warn('ERROR: A sequence contains both T and U')
counts[:t], counts[:u] = t, u
end
-
+
counts[:n] = s.count('n') if count_ambiguous_bases
-
+
counts
end
# Returns a map of base frequencies
#
@@ -114,10 +124,10 @@
#
# @return [Hash] A hash with base as key, frequency as value
def base_frequencies(count_ambiguous_bases=nil)
base_counts = self.base_counts(count_ambiguous_bases)
total_bases = base_counts.values.reduce(:+).to_f
- base_freqs =
+ base_freqs =
base_counts.map { |base, count| [base, count/total_bases] }.flatten
Hash[*base_freqs]
end
end