lib/bio/db/gff/digest/gffnocache.rb in bio-gff3-0.8.5 vs lib/bio/db/gff/digest/gffnocache.rb in bio-gff3-0.8.6
- old
+ new
@@ -19,34 +19,39 @@
# Module to fetch a line from GFF3 file and returns a parsed
# record
module SeekRec
# Fetch a record using fh and file seek position
- def SeekRec::fetch(fh,fpos)
+ def SeekRec::fetch(fh,fpos,parser)
return nil if fh==nil or fpos==nil
fh.seek(fpos)
- GFF::GFF3::FileRecord.new(fpos, fh.gets)
+ if parser == :bioruby
+ GFF::GFF3::BioRubyFileRecord.new(fpos, fh.gets)
+ else
+ GFF::GFF3::FastParserFileRecord.new(fpos, fh.gets)
+ end
end
end
# Helper class which gives Hash-like access to the
# no-cache GFF3 file
class SeekRecList
- def initialize fh
+ def initialize fh, parser
@fh = fh
@h = {}
+ @parser = parser
end
def []= id, rec
raise "id #{id} occurs twice!" if @h[id]
fpos = rec.io_seek
@h[id] = fpos
end
def [](id)
fpos = @h[id]
- SeekRec::fetch(@fh,fpos)
+ SeekRec::fetch(@fh,fpos,@parser)
end
def each
@h.each do | id,fpos |
yield id, self[id]
@@ -54,11 +59,11 @@
end
end
# List of ids
class SeekLinkedRecs < Hash
- include Helpers::Error
+ include Helpers::Logger
def add id, rec
info "Adding #{rec.feature_type} <#{id}>"
self[id] = [] if self[id] == nil
self[id] << rec.io_seek
end
@@ -89,18 +94,26 @@
# rather than the records themselves
def parse
info "---- Digest DB and store data in mRNA Hash (NoCache)"
@count_ids = Counter.new # Count ids
@count_seqnames = Counter.new # Count seqnames
- @componentlist = SeekRecList.new(@iter.fh) # Store containers, like genes, contigs
+ @componentlist = SeekRecList.new(@iter.fh,@options[:parser]) # Store containers, like genes, contigs
@orflist = SeekLinkedRecs.new # Store linked gene records
@mrnalist = SeekLinkedRecs.new # Store linked mRNA records
@cdslist = SeekLinkedRecs.new
@exonlist = SeekLinkedRecs.new
@sequencelist = {}
@unrecognized_features = {}
- @iter.each_rec do | id, rec |
+ @iter.each_rec do |fpos, line|
+ rec = case @options[:parser]
+ when :bioruby
+ Bio::GFF::GFF3::BioRubyFileRecord.new(fpos, line)
+ when :line
+ Bio::GFF::GFF3::FastParserFileRecord.new(fpos, line)
+ else
+ raise 'Unknown parser'
+ end
store_record(rec)
end
@iter.each_sequence do | id, bioseq |
@sequencelist[id] = bioseq.to_s
end
@@ -115,14 +128,20 @@
# p list.class
fh = @iter.fh
list.each do | id, io_seeklist |
recs = []
io_seeklist.each do | fpos |
- recs << SeekRec::fetch(fh,fpos)
+ recs << SeekRec::fetch(fh,fpos,@options[:parser])
end
seqid = recs[0].seqname
component = find_component(recs[0])
- yield id, recs, component
+ if @options[:no_assemble]
+ recs.each do | rec |
+ yield id, [rec], component
+ end
+ else
+ yield id, recs, component
+ end
end
end
end
end