lib/parse_fasta/sequence.rb in parse_fasta-1.7.2 vs lib/parse_fasta/sequence.rb in parse_fasta-1.8.0
- old
+ new
@@ -18,10 +18,19 @@
# Provide some methods for dealing with common tasks regarding
# nucleotide sequences.
class Sequence < String
+ # # Error raised if both T and U are present
+ # #
+ # # @note This is NOT checked on every call to Sequence.new
+ # class AmbiguousSequenceError < StandardError
+ # def message
+ # "Sequence is ambiguous -- both T and U present"
+ # end
+ # end
+
# Strips whitespace from the str argument before calling super
#
# @return [Sequence] A Sequence string
#
# @example Removes whitespace
@@ -127,7 +136,33 @@
base_counts = self.base_counts(count_ambiguous_bases)
total_bases = base_counts.values.reduce(:+).to_f
base_freqs =
base_counts.map { |base, count| [base, count/total_bases] }.flatten
Hash[*base_freqs]
+ end
+
+ # Returns a reverse complement of self
+ #
+ # @return [Sequence] a Sequence that is the reverse complement of
+ # self
+ #
+ # @example Hanldes any IUPAC character and capitalization properly
+ # Sequence.new("gARKbdctymvhu").rev_comp #=> "adbkraghvMYTc"
+ #
+ # @example Leaves non IUPAC characters
+ # Sequence.new("cccc--CCCcccga").rev_comp #=> "tcgggGGG--gggg""
+ #
+ # @note If Sequence contains non-IUPAC characters, these are not
+ # complemented
+ def rev_comp
+ # if self.match(/T/i) && self.match(/U/i)
+ # raise Sequence::AmbiguousSequenceError
+ # end
+
+ # if self.match(/[^ATUGCYRSWKMBDHVN]/i)
+ # warn "WARNING: Sequence contains non IUPAC characters"
+ # end
+
+ self.reverse.tr("ATUGCYRSWKMBDHVNatugcyrswkmbdhvn",
+ "TAACGRYSWMKVHDBNtaacgryswmkvhdbn")
end
end