lib/bio/db/gff/digest/gffnocache.rb in bio-gff3-0.8.5 vs lib/bio/db/gff/digest/gffnocache.rb in bio-gff3-0.8.6

- old
+ new

@@ -19,34 +19,39 @@ # Module to fetch a line from GFF3 file and returns a parsed # record module SeekRec # Fetch a record using fh and file seek position - def SeekRec::fetch(fh,fpos) + def SeekRec::fetch(fh,fpos,parser) return nil if fh==nil or fpos==nil fh.seek(fpos) - GFF::GFF3::FileRecord.new(fpos, fh.gets) + if parser == :bioruby + GFF::GFF3::BioRubyFileRecord.new(fpos, fh.gets) + else + GFF::GFF3::FastParserFileRecord.new(fpos, fh.gets) + end end end # Helper class which gives Hash-like access to the # no-cache GFF3 file class SeekRecList - def initialize fh + def initialize fh, parser @fh = fh @h = {} + @parser = parser end def []= id, rec raise "id #{id} occurs twice!" if @h[id] fpos = rec.io_seek @h[id] = fpos end def [](id) fpos = @h[id] - SeekRec::fetch(@fh,fpos) + SeekRec::fetch(@fh,fpos,@parser) end def each @h.each do | id,fpos | yield id, self[id] @@ -54,11 +59,11 @@ end end # List of ids class SeekLinkedRecs < Hash - include Helpers::Error + include Helpers::Logger def add id, rec info "Adding #{rec.feature_type} <#{id}>" self[id] = [] if self[id] == nil self[id] << rec.io_seek end @@ -89,18 +94,26 @@ # rather than the records themselves def parse info "---- Digest DB and store data in mRNA Hash (NoCache)" @count_ids = Counter.new # Count ids @count_seqnames = Counter.new # Count seqnames - @componentlist = SeekRecList.new(@iter.fh) # Store containers, like genes, contigs + @componentlist = SeekRecList.new(@iter.fh,@options[:parser]) # Store containers, like genes, contigs @orflist = SeekLinkedRecs.new # Store linked gene records @mrnalist = SeekLinkedRecs.new # Store linked mRNA records @cdslist = SeekLinkedRecs.new @exonlist = SeekLinkedRecs.new @sequencelist = {} @unrecognized_features = {} - @iter.each_rec do | id, rec | + @iter.each_rec do |fpos, line| + rec = case @options[:parser] + when :bioruby + Bio::GFF::GFF3::BioRubyFileRecord.new(fpos, line) + when :line + Bio::GFF::GFF3::FastParserFileRecord.new(fpos, line) + else + raise 'Unknown parser' + end store_record(rec) end @iter.each_sequence do | id, bioseq | @sequencelist[id] = bioseq.to_s end @@ -115,14 +128,20 @@ # p list.class fh = @iter.fh list.each do | id, io_seeklist | recs = [] io_seeklist.each do | fpos | - recs << SeekRec::fetch(fh,fpos) + recs << SeekRec::fetch(fh,fpos,@options[:parser]) end seqid = recs[0].seqname component = find_component(recs[0]) - yield id, recs, component + if @options[:no_assemble] + recs.each do | rec | + yield id, [rec], component + end + else + yield id, recs, component + end end end end end