lib/parse_fasta/sequence.rb in parse_fasta-1.7.2 vs lib/parse_fasta/sequence.rb in parse_fasta-1.8.0

- old
+ new

@@ -18,10 +18,19 @@ # Provide some methods for dealing with common tasks regarding # nucleotide sequences. class Sequence < String + # # Error raised if both T and U are present + # # + # # @note This is NOT checked on every call to Sequence.new + # class AmbiguousSequenceError < StandardError + # def message + # "Sequence is ambiguous -- both T and U present" + # end + # end + # Strips whitespace from the str argument before calling super # # @return [Sequence] A Sequence string # # @example Removes whitespace @@ -127,7 +136,33 @@ base_counts = self.base_counts(count_ambiguous_bases) total_bases = base_counts.values.reduce(:+).to_f base_freqs = base_counts.map { |base, count| [base, count/total_bases] }.flatten Hash[*base_freqs] + end + + # Returns a reverse complement of self + # + # @return [Sequence] a Sequence that is the reverse complement of + # self + # + # @example Hanldes any IUPAC character and capitalization properly + # Sequence.new("gARKbdctymvhu").rev_comp #=> "adbkraghvMYTc" + # + # @example Leaves non IUPAC characters + # Sequence.new("cccc--CCCcccga").rev_comp #=> "tcgggGGG--gggg"" + # + # @note If Sequence contains non-IUPAC characters, these are not + # complemented + def rev_comp + # if self.match(/T/i) && self.match(/U/i) + # raise Sequence::AmbiguousSequenceError + # end + + # if self.match(/[^ATUGCYRSWKMBDHVN]/i) + # warn "WARNING: Sequence contains non IUPAC characters" + # end + + self.reverse.tr("ATUGCYRSWKMBDHVNatugcyrswkmbdhvn", + "TAACGRYSWMKVHDBNtaacgryswmkvhdbn") end end