Sha256: 70eace204db1f81db182db5007c7eec1428f2682d8604b624f54b4fcf617852e

Contents?: true

Size: 1.37 KB

Versions: 4

Compression:

Stored size: 1.37 KB

Contents

require 'enumerator' 

module Bio
  module BlastXMLParser
    # Reads a full XML result and splits it out into a buffer for each
    # Iteration (query result).
    class BlastXmlSplitter
      def initialize fn
        @fn = fn
      end
      def each
        logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
        logger.info("split file parsing #{@fn}")
        f = File.open(@fn)
        # Skip BLAST header
        f.each_line do | line |
          break if line.strip == "<Iteration>"
        end
        # Return each Iteration as an XML DOM
        each_iteration(f) do | buf |
          yield buf
        end
      end

    private

      def each_iteration f
        # b = ["<?xml version=\"1.0\"?>\n","<Iteration>\n"]
        # b = []
        b = ["<Iteration>\n"]
        f.each_line do | line |
          b << line
          if line.strip == "</Iteration>"
            yield b
            b = []
          end
        end
      end
    end

    class XmlSplitterIterator
      # include Enumerable

      def initialize fn
        @splitter = BlastXmlSplitter.new(fn)
      end

      def to_enum 
        Enumerator.new do | yielder | 
          @splitter.each do | buf |
            iteration = Nokogiri::XML.parse(buf.join) { | cfg | cfg.noblanks }
            yielder.yield NokogiriBlastIterator.new(iteration,self,:prefix=>nil)
          end
        end
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
bio-blastxmlparser-2.0.4 lib/bio/db/blast/xmlsplitter.rb
bio-blastxmlparser-2.0.3 lib/bio/db/blast/xmlsplitter.rb
bio-blastxmlparser-2.0.2 lib/bio/db/blast/xmlsplitter.rb
bio-blastxmlparser-2.0.1 lib/bio/db/blast/xmlsplitter.rb