# # = bio/appl/blast/report.rb - BLAST Report class # # Copyright:: Copyright (C) 2003 Toshiaki Katayama # License:: The Ruby License # require 'bio/io/flatfile' module Bio require 'bio/appl/blast' unless const_defined?(:Blast) class Blast # = Bio::Blast::Report # # Parsed results of the blast execution for Tab-delimited and XML output # format. Tab-delimited reports are consists of # # Query id, # Subject id, # percent of identity, # alignment length, # number of mismatches (not including gaps), # number of gap openings, # start of alignment in query, # end of alignment in query, # start of alignment in subject, # end of alignment in subject, # expected value, # bit score. # # according to the MEGABLAST document (README.mbl). As for XML output, # see the following DTDs. # # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_Entity.mod # class Report #-- # require lines moved here to avoid circular require #++ require 'bio/appl/blast/xmlparser' require 'bio/appl/blast/rexml' require 'bio/appl/blast/format8' # for Bio::FlatFile support (only for XML data) DELIMITER = RS = "\n" # Specify to use XMLParser to parse XML (-m 7) output. def self.xmlparser(data) self.new(data, :xmlparser) end # Specify to use REXML to parse XML (-m 7) output. def self.rexml(data) self.new(data, :rexml) end # Specify to use tab delimited output parser. def self.tab(data) self.new(data, :tab) end def auto_parse(data) if /= 2.2.14), results of multiple queries # are stored in . This method splits iterations into # multiple Bio::Blast objects and returns them as an array. def blastxml_split_reports unless self.iterations.find { |iter| iter.query_id || iter.query_def || iter.query_len } then # traditional BLAST XML format, or blastpgp result. return nil end # new BLAST XML format (blastall 2.2.14 or later) origin = self reports = [] prev_iternum = 0 firsttime = true orig_iters = self.iterations orig_iters.each do |iter| blast = self.class.new(nil, false) # When no hits found, the iteration is skipped in NCBI BLAST XML. # So, filled with empty report object. if prev_iternum + 1 < iter.num then ((prev_iternum + 1)...(iter.num)).each do |num| empty_i = Iteration.new empty_i.num = num empty_i.instance_eval { if firsttime then @query_id = origin.query_id @query_def = origin.query_def @query_len = origin.query_len firsttime = false end } empty = self.class.new(nil, false) empty.instance_eval { # queriy_* are copied from the empty_i @query_id = empty_i.query_id @query_def = empty_i.query_def @query_len = empty_i.query_len # others are copied from the origin @program = origin.program @version = origin.version @reference = origin.reference @db = origin.db @parameters.update(origin.parameters) # the empty_i is added to the iterations @iterations.push empty_i } reports.push empty end end blast.instance_eval { if firsttime then @query_id = origin.query_id @query_def = origin.query_def @query_len = origin.query_len firsttime = false end # queriy_* are copied from the iter @query_id = iter.query_id if iter.query_id @query_def = iter.query_def if iter.query_def @query_len = iter.query_len if iter.query_len # others are copied from the origin @program = origin.program @version = origin.version @reference = origin.reference @db = origin.db @parameters.update(origin.parameters) # rewrites hit's query_id, query_def, query_len iter.hits.each do |h| h.query_id = @query_id h.query_def = @query_def h.query_len = @query_len end # the iter is added to the iterations @iterations.push iter } prev_iternum = iter.num reports.push blast end #orig_iters.each # This object's iterations is set as first report's iterations @iterations.clear if rep = reports.first then @iterations = rep.iterations end return reports end # Flatfile splitter for NCBI BLAST XML format. # It is internally used when reading BLAST XML. # Normally, users do not need to use it directly. class BlastXmlSplitter < Bio::FlatFile::Splitter::Default # creates a new splitter object def initialize(klass, bstream) super(klass, bstream) @parsed_entries = [] @raw_unsupported = false end # rewinds def rewind ret = super @parsed_entries.clear @raw_unsupported = false ret end # do nothing def skip_leader nil end # get an entry and return the entry as a string def get_entry if @parsed_entries.empty? then @raw_unsupported = false ent = super prepare_parsed_entries(ent) self.parsed_entry = @parsed_entries.shift else raise 'not supported for new BLAST XML format' end ent end # get an entry as a Bio::Blast::Report object def get_parsed_entry if @parsed_entries.empty? then get_entry else self.parsed_entry = @parsed_entries.shift self.entry = nil @raw_unsupported = true end self.parsed_entry end # current raw entry as a String def entry raise 'not supported for new BLAST XML format' if @raw_unsupported super end # start position of the entry def entry_start_pos if entry_pos_flag then raise 'not supported for new BLAST XML format' if @raw_unsupported end super end # (end position of the entry) + 1 def entry_ended_pos if entry_pos_flag then raise 'not supported for new BLAST XML format' if @raw_unsupported end super end private # (private method) to prepare parsed entry def prepare_parsed_entries(ent) if ent then blast = dbclass.new(ent) if blast.reports and blast.reports.size >= 1 then # new blast xml using for multiple queries @parsed_entries.concat blast.reports else # traditional blast xml @parsed_entries.push blast end end end end #class BlastXmlSplitter # splitter for Bio::FlatFile support FLATFILE_SPLITTER = BlastXmlSplitter end # Report # NCBI BLAST tabular (-m 8) output parser. # All methods are equal to Bio::Blast::Report. # Only DELIMITER (and RS) is different. # class Report_tab < Report # Delimter of each entry. Bio::FlatFile uses it. DELIMITER = RS = nil end #class Report_tabular end # Blast end # Bio #if __FILE__ == $0 =begin begin # p is suitable than pp for the following test script require 'pp' alias p pp rescue end # for multiple xml reports (iterates on each Blast::Report) Bio::Blast.reports(ARGF) do |rep| rep.iterations.each do |itr| itr.hits.each do |hit| hit.hsps.each do |hsp| end end end end # for multiple xml reports (returns Array of Blast::Report) reps = Bio::Blast.reports(ARGF.read) # for a single report (xml or tab) format auto detect, parser auto selected rep = Bio::Blast::Report.new(ARGF.read) # to use xmlparser explicitly for a report rep = Bio::Blast::Report.xmlparser(ARGF.read) # to use resml explicitly for a report rep = Bio::Blast::Report.rexml(ARGF.read) # to use a tab delimited report rep = Bio::Blast::Report.tab(ARGF.read) =end #end