lib/bio/appl/mafft/report.rb in bio-1.0.0 vs lib/bio/appl/mafft/report.rb in bio-1.1.0

- old
+ new

@@ -1,117 +1,224 @@ # # = bio/appl/mafft/report.rb - MAFFT report class # -# Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp> -# License:: LGPL +# Copyright:: Copyright (C) 2003, 2007 Naohisa Goto <ng@bioruby.org> +# License:: The Ruby License # -#-- -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2 of the License, or (at your option) any later version. +# $Id: report.rb,v 1.13 2007/07/16 12:21:39 ngoto Exp $ # -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#++ -# -# $Id: report.rb,v 1.8 2005/12/18 15:58:40 k Exp $ -# # MAFFT result parser class. # MAFFT is a very fast multiple sequence alignment software. # # Since a result of MAFFT is simply a multiple-fasta format, # the significance of this class is to keep standard form and # interface between Bio::ClustalW::Report. # +# Bio::Alignment::MultiFastaFormat is a generic data class for +# fasta-formatted multiple sequence alignment data. +# Bio::MAFFT::Report inherits Bio::Alignment::MultiFastaFormat. +# # == References # # * K. Katoh, K. Misawa, K. Kuma and T. Miyata. # MAFFT: a novel method for rapid multiple sequence alignment based # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002. # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059 # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/ # +require 'stringio' require 'bio/db/fasta' require 'bio/io/flatfile' +require 'bio/alignment' require 'bio/appl/mafft' module Bio + module Alignment + # Data class for fasta-formatted multiple sequence alignment data, + # which is simply multiple entiries of fasta formatted sequences. + class MultiFastaFormat + + # delimiter for flatfile + DELIMITER = RS = nil + + # Creates a new data object. + # +str+ should be a (multi-)fasta formatted string. + def initialize(str) + ff = Bio::FlatFile.new(Bio::FastaFormat, StringIO.new(str)) + @data = ff.to_a + @alignment = nil + @seq_method = nil + end + + # Gets an multiple alignment. + # Returns a Bio::Alignment object. + # +method+ should be one of :naseq, :aaseq, :seq, or nil (default). + # nil means to automatically determine nucleotide or amino acid. + # + # This method returns previously parsed object + # if the same method is given (or guessed method is the same). + def alignment(method = nil) + m = determine_seq_method(@data, method) + if !@alignment or m != @seq_method then + @seq_method = m + @alignment = do_parse(@data, @seq_method) + end + @alignment + end + + # Gets an array of the fasta formatted sequence objects. + # Returns an array of Bio::FastaFormat objects. + def entries + @data + end + + private + # determines seqtype. + # if nil is given, try to guess DNA or protein. + def determine_seq_method(data, m = nil) + case m + when :aaseq + :aaseq + when :naseq + :naseq + when :seq + :seq + when nil + # auto-detection + score = 0 + data[0, 3].each do |e| + k = e.to_seq.guess + if k == Bio::Sequence::NA then + score += 1 + elsif k == Bio::Sequence::AA then + score -= 1 + end + end + if score > 0 then + :naseq + elsif score < 0 then + :aaseq + else + :seq + end + else + raise 'one of :naseq, :aaseq, :seq, or nil should be given' + end + end + + # Parses a result. + def do_parse(ary, seqmethod) + a = Bio::Alignment.new + a.add_sequences(ary) do |x| + [ x.__send__(seqmethod), x.definition ] + end + a + end + end #class MultiFastaFormat + end #module Alignment + class MAFFT # MAFFT result parser class. # MAFFT is a very fast multiple sequence alignment software. # # Since a result of MAFFT is simply a multiple-fasta format, # the significance of this class is to keep standard form and # interface between Bio::ClustalW::Report. - class Report + class Report < Bio::Alignment::MultiFastaFormat # Creates a new Report object. - # +ary+ should be an Array of Bio::FastaFormat. - # +seqclass+ should on of following: + # +str+ should be multi-fasta formatted text as a string. + # + # Compatibility Note: the old usage (to get array of Bio::FastaFormat + # objects) is deprecated. + # + # Compatibility Note 2: the argument +seqclass+ is deprecated. + # + # +seqclass+ should be one of following: # Class: Bio::Sequence::AA, Bio::Sequence::NA, ... # String: 'PROTEIN', 'DNA', ... - def initialize(ary, seqclass = nil) - @data = ary - @align = nil - case seqclass - when /PROTEIN/i - @seqclass = Bio::Sequence::AA - when /[DR]NA/i - @seqclass = Bio::Sequence::NA + # + def initialize(str, seqclass = nil) + if str.is_a?(Array) then + warn "Array of Bio::FastaFormat objects will be no longer accepted." + @data = str else - if seqclass.is_a?(Module) then - @seqclass = seqclass + super(str) + end + + if seqclass then + warn "the 2nd argument (seqclass) will be no deprecated." + case seqclass + when /PROTEIN/i + @seqclass = Bio::Sequence::AA + when /[DR]NA/i + @seqclass = Bio::Sequence::NA else - @seqclass = Bio::Sequence + if seqclass.is_a?(Module) then + @seqclass = seqclass + else + @seqclass = nil + end end end end # sequence data. Returns an array of Bio::FastaFormat. attr_reader :data # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...) + # + # Compatibility note: This method will be removed in the tufure. attr_reader :seqclass # Gets an multiple alignment. - # Returns an instance of Bio::Alignment class. + # Returns a Bio::Alignment object. + def alignment(method = nil) + super + end + + # This method will be deprecated. Instead, please use alignment. + # + # Gets an multiple alignment. + # Returns a Bio::Alignment object. def align - do_parse() unless @align - @align + warn "Bio::MAFFT::Report#align is deprecated. Please use \'alignment\'." + alignment end - alias alignment align + # This will be deprecated. Instead, please use alignment.output_fasta. + # # Gets an fasta-format string of the sequences. # Returns a string. # Same as align.to_fasta. - # Please refer to Bio::Alignment#to_fasta for arguments. + # Please refer to Bio::Alignment#output_fasta for arguments. def to_fasta(*arg) - align.to_fasta(*arg) + warn "Bio::MAFFT::report#to_fasta is deprecated. Please use \'alignment.output_fasta\'" + alignment.output_fasta(*arg) end + # Compatibility note: Behavior of the method will be changed + # in the future. + # # Gets an array of the sequences. # Returns an array of Bio::FastaFormat instances. def to_a @data end private # Parsing a result. - def do_parse - return nil if @align - @align = Bio::Alignment.new(@data) do |x| - [ @seqclass.new(x.seq), x.definition ] + def do_parse(ary, seqmethod) + if @seqclass then + a = Bio::Alignment.new + a.add_sequences(ary) do |x| + [ @seqclass.new(x.seq), x.definition ] + end + else + super(ary, seqmethod) end - nil end end #class Report end #class MAFFT end #module Bio