#!/usr/bin/env ruby # # BioRuby bio-blastxmlparser Plugin # Author:: Pjotr Prins # License:: MIT License # # Copyright (C) 2010-2014 Pjotr Prins rootpath = File.dirname(File.dirname(__FILE__)) $: << File.join(rootpath,'lib') BLASTXML_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp $stderr.print "BLAST XML Parser "+BLASTXML_VERSION+" Copyright (C) 2014 Pjotr Prins \n\n" USAGE = < 512_000_000 parser_type = :split else parser_type = :nosplit end end n = if parser_type == :nosplit Bio::BlastXMLParser::NokogiriBlastXml.new(File.new(fn)).to_enum else # default Bio::BlastXMLParser::BlastXmlSplitter.new(fn) end chunks = [] chunks_count = 0 NUM_CHUNKS=10_000 process = lambda { |iter2,i| # Process one BLAST iter block if parser_type == :nosplit iter = iter2 else xml = Nokogiri::XML.parse(iter2.join) { | cfg | cfg.noblanks } iter = Bio::BlastXMLParser::NokogiriBlastIterator.new(xml,self,:prefix=>nil) end res = [] line_count = 0 hit_count = 0 iter.each do | hit | hit_count += 1 hit.each do | hsp | do_print = if options.filter eval(options.filter) else true end if do_print line_count += 1 if template res << template.result(binding) elsif options.output_fasta res << ">"+hit.accession+' '+iter.iter_num.to_s+'|'+iter.query_id+' '+hit.hit_id+' '+hit.hit_def+"\n" res << hsp.qseq+"\n" else # Default output if options.fields out = [iter.iter_num,hit_count,hsp.hsp_num] options.fields.each do | f | out << eval(f) end res << out.join("\t")+"\n" else res << [iter.iter_num,iter.query_id,hit_count,hit.hit_id,hsp.hsp_num,hsp.evalue].join("\t")+"\n" end end end end end res } # end process output = lambda { |collection| collection.each do | result | result.each { |line| print line } end } # end output if options.threads == 1 n.each do | iter | process.call(iter,0).each { | line | print line } end else n.each do | iter | chunks << iter chunks_count += 1 if chunks.size > NUM_CHUNKS out = Parallel.map_with_index(chunks, :in_processes => options.threads) { | iter,i | process.call(iter,i) } # Output is forked to a separate process too fork do output.call out STDOUT.flush STDOUT.close exit 0 end chunks = [] end end output.call Parallel.map_with_index(chunks, :in_processes => options.threads) { | iter,i | process.call(iter,i) } end end rescue OptionParser::InvalidOption => e $stderr.print e.message end