# # = bio/appl/phylip/alignment.rb - phylip multiple alignment format parser # # Copyright:: Copyright (C) 2006 # GOTO Naohisa # # License:: The Ruby License # # $Id: alignment.rb,v 1.2 2007/04/05 23:35:40 trevor Exp $ # # = About Bio::Phylip::PhylipFormat # # Please refer document of Bio::Phylip::PhylipFormat class. # module Bio module Phylip # This is phylip multiple alignment format parser. # The two formats, interleaved and non-interleaved, are # automatically determined. # class PhylipFormat # create a new object from a string def initialize(str) @data = str.strip.split(/(?:\r\n|\r|\n)/) @first_line = @data.shift @number_of_sequences, @alignment_length = @first_line.to_s.strip.split(/\s+/).collect { |x| x.to_i } end # number of sequences attr_reader :number_of_sequences # alignment length attr_reader :alignment_length # If the alignment format is "interleaved", returns true. # If not, returns false. # It would mistake to determine if the alignment is very short. def interleaved? unless defined? @interleaved_flag then if /\A +/ =~ @data[1].to_s then @interleaved_flag = false else @interleaved_flag = true end end @interleaved_flag end # Gets the alignment. Returns a Bio::Alignment object. def alignment unless defined? @alignment then do_parse a = Bio::Alignment.new (0...@number_of_sequences).each do |i| a.add_seq(@sequences[i], @sequence_names[i]) end @alignment = a end @alignment end private def do_parse if interleaved? then do_parse_interleaved else do_parse_noninterleaved end end def do_parse_interleaved first_block = @data[0, @number_of_sequences] @data[0, @number_of_sequences] = '' @sequence_names = Array.new(@number_of_sequences) { '' } @sequences = Array.new(@number_of_sequences) do ' ' * @alignment_length end first_block.each_with_index do |x, i| n, s = x.split(/ +/, 2) @sequence_names[i] = n @sequences[i].replace(s.gsub(/\s+/, '')) end i = 0 @data.each do |x| if x.strip.length <= 0 then i = 0 else @sequences[i] << x.gsub(/\s+/, '') i = (i + 1) % @number_of_sequences end end @data.clear true end def do_parse_noninterleaved @sequence_names = Array.new(@number_of_sequences) { '' } @sequences = Array.new(@number_of_sequences) do ' ' * @alignment_length end curseq = nil i = 0 @data.each do |x| next if x.strip.length <= 0 if !curseq or curseq.length > @alignment_length or /^\s/ !~ x then p i n, s = x.strip.split(/ +/, 2) @sequence_names[i] = n curseq = @sequences[i] curseq.replace(s.gsub(/\s+/, '')) i += 1 else curseq << x.gsub(/\s+/, '') end end @data.clear true end end #class PhylipFormat end #module Phylip end #module Bio