Sha256: db6dd5d32c9764652dc672b409e330665161f987f4ae31248bfaaebafc1c7938

Contents?: true

Size: 1.19 KB

Versions: 3

Compression:

Stored size: 1.19 KB

Contents

require 'enumerator' 

module Bio
  module BlastXMLParser
    # Reads a full XML result and splits it out into a buffer for each
    # Iteration (query result).
    class XmlSplitterIterator
      # include Enumerable

      def initialize fn
        @fn = fn
      end

      def to_enum 
        Enumerator.new do | yielder | 
          logger = Bio::Log::LoggerPlus['bio-blastxmlparser']
          logger.info("split file parsing #{@fn}")
          f = File.open(@fn)
          # Skip BLAST header
          f.each_line do | line |
            break if line.strip == "<Iteration>"
          end
          # Return each Iteration as an XML DOM
          each_iteration(f) do | buf |
            iteration = Nokogiri::XML.parse(buf.join) { | cfg | cfg.noblanks }
            yielder.yield NokogiriBlastIterator.new(iteration,self,:prefix=>nil)
          end
        end
      end

    private

      def each_iteration f
        # b = ["<?xml version=\"1.0\"?>\n","<Iteration>\n"]
        # b = []
        b = ["<Iteration>\n"]
        f.each_line do | line |
          b << line
          if line.strip == "</Iteration>"
            yield b
            b = []
          end
        end
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
bio-blastxmlparser-2.0.0 lib/bio/db/blast/xmlsplitter.rb
bio-blastxmlparser-1.1.2 lib/bio/db/blast/xmlsplitter.rb
bio-blastxmlparser-1.1.1 lib/bio/db/blast/xmlsplitter.rb