bin/gff3-fetch in bio-gff3-0.8.5 vs bin/gff3-fetch in bio-gff3-0.8.6
- old
+ new
@@ -23,20 +23,20 @@
mRNA : assemble mRNA
CDS : assemble CDS
exon : list all exons
gene|ORF : list gene ORFs
- other : use any type from GFF3 definition, e.g. 'Terminate' -- NYI
+ other : use any type from GFF3 definition, e.g. 'Terminate'
and the following performance options:
--parser bioruby : use BioRuby GFF3 parser (slow)
- --parser line : use GFF3 line parser -- in preparation
- --parser block : use GFF3 block parser (optimistic) -- NYI
+ --parser line : use GFF3 line parser (faster, default)
+ --block : parse GFF3 by block (optimistic) -- NYI
--cache full : load all in RAM (fast, default)
--cache none : do not load anything in memory (slow)
- --cache lru : use LRU cache (limit RAM use, fast) -- NYI
+ --cache lru : use least recently used cache (limit RAM use, fast) -- NYI
--max-cpus num : use num threads -- NYI
--emboss : use EMBOSS translation (fast) -- NYI
Where (NYI == Not Yet Implemented):
@@ -72,26 +72,50 @@
Fine tuning output - show errors only
gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR
+ Fine tuning outpt - show messages matching regex
+
+ gff3-fetch mRNA test/data/gff/test.gff3 --trace '=msg =~ /component/'
+
Fine tuning output - write log messages to file.log
gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR --logger file.log
+For more information on output, see the bioruby-logger plugin.
+
== Performance
time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 2> /dev/null > test.fa
- Cache real user sys
+ Digesting parser:
+
+ Cache real user sys version RAM
------------------------------------------------------------
full,bioruby 12m41 12m28 0m09 (0.8.0)
full,line 12m13 12m06 0m07 (0.8.5)
- none,bioruby 504m39 477m49 26m50 (0.8.0)
- lru,bioruby ?
- ----------------------------------------------------
+ full,line,lazy 11m51 11m43 0m07 (0.8.6) 6,600M
+ none,bioruby 504m 477m 26m50 (0.8.0)
+ none,line 297m 267m 28m36 (0.8.5)
+ none,line,lazy 132m 106m 26m01 (0.8.6) 650M
+
+ lru,bioruby 533m 510m 22m47 (0.8.5)
+ lru,line 353m 326m 26m44 (0.8.5) 1K
+ lru,line 305m 281m 22m30 (0.8.5) 10K
+ lru,line,lazy 182m 161m 21m10 (0.8.6) 10K
+ lru,line,lazy 75m 75m 0m17 (0.8.6) 50K 730M
+ ------------------------------------------------------------
+
+ Block parser:
+
+ Cache real user sys gff3 version
+ ------------------------------------------------------------
+ in preparation
+ ------------------------------------------------------------
+
where
52M m_hapla.WS217.dna.fa
456M m_hapla.WS217.gff3
@@ -130,43 +154,47 @@
Bio::Log::CLI.trace('info')
options = OpenStruct.new()
# ---- Default options
-options.parser = :bioruby
+options.parser = :line
opts = OptionParser.new() { |opts|
opts.on_tail("-h", "--help", "Show help and examples") {
print(opts)
print USAGE
exit()
}
- opts.on("--parser [bioruby,line]", String, "Parser (default bioruby)") do |p|
+ opts.on("--parser [bioruby,line]", String, "Parser (default line)") do |p|
case p.downcase
when 'bioruby'
options.parser = :bioruby
when 'line'
options.parser = :line
else
raise 'Unknown --parser option'
end
end
- opts.on("--cache [none,full]", String, "Caching (default full)") do |cache|
+ opts.on("--cache [full,lru,none]", String, "Caching (default full)") do |cache|
case cache.downcase
when 'none'
options.cache = :cache_none
- # when 'lru'
- # options.cache = :cache_lru
+ when 'lru'
+ options.cache = :cache_lru
when 'full'
options.cache = :cache_full
else
raise 'Unknown --cache option'
end
end
+ opts.on("--block", "Parse by block") do |b|
+ options.block = true
+ end
+
opts.on("--no-assemble", "output sequences without assembling") do |b|
options.no_assemble = true
end
opts.on("--phase", "adjust for phase (useful for CDS --no-assemble --translate)") do |b|
@@ -227,19 +255,24 @@
end
opts = {}
opts[:validate] = options.validate
opts[:parser] = options.parser
+ opts[:block] = options.block
opts[:cache_components] = options.cache
opts[:cache_records] = options.cache
opts[:fasta_filename] = fastafn if fastafn
opts[:fix_wormbase] = options.fix_wormbase
opts[:fix] = options.fix
opts[:no_assemble] = options.no_assemble
opts[:phase] = options.phase
opts[:debug] = options.debug
+ include Bio::GFFbrowser::Helpers::Logger
+ debug $:.to_s
+ debug opts.to_s
+ log_sys_info("BaseLine")
gff3 = Bio::GFFbrowser::GFF3.new(fn,opts)
gff = gff3.assembler
writer = Bio::GFFbrowser::FastaWriter.new(options.translate, options.validate)
@@ -263,11 +296,14 @@
when 'cds'
gff.each_CDS_seq do | id, seq |
writer.put(id,seq)
end
else
- raise "Unknown action on type <#{gfftype}>"
+ gff.each_seq(gfftype.downcase) do | id, seq |
+ writer.put(id,seq)
+ end
end
+ log_sys_info("Done")
fastafn = nil
end