bin/gff3-fetch in bio-gff3-0.8.5 vs bin/gff3-fetch in bio-gff3-0.8.6

- old
+ new

@@ -23,20 +23,20 @@ mRNA : assemble mRNA CDS : assemble CDS exon : list all exons gene|ORF : list gene ORFs - other : use any type from GFF3 definition, e.g. 'Terminate' -- NYI + other : use any type from GFF3 definition, e.g. 'Terminate' and the following performance options: --parser bioruby : use BioRuby GFF3 parser (slow) - --parser line : use GFF3 line parser -- in preparation - --parser block : use GFF3 block parser (optimistic) -- NYI + --parser line : use GFF3 line parser (faster, default) + --block : parse GFF3 by block (optimistic) -- NYI --cache full : load all in RAM (fast, default) --cache none : do not load anything in memory (slow) - --cache lru : use LRU cache (limit RAM use, fast) -- NYI + --cache lru : use least recently used cache (limit RAM use, fast) -- NYI --max-cpus num : use num threads -- NYI --emboss : use EMBOSS translation (fast) -- NYI Where (NYI == Not Yet Implemented): @@ -72,26 +72,50 @@ Fine tuning output - show errors only gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR + Fine tuning outpt - show messages matching regex + + gff3-fetch mRNA test/data/gff/test.gff3 --trace '=msg =~ /component/' + Fine tuning output - write log messages to file.log gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR --logger file.log +For more information on output, see the bioruby-logger plugin. + == Performance time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 2> /dev/null > test.fa - Cache real user sys + Digesting parser: + + Cache real user sys version RAM ------------------------------------------------------------ full,bioruby 12m41 12m28 0m09 (0.8.0) full,line 12m13 12m06 0m07 (0.8.5) - none,bioruby 504m39 477m49 26m50 (0.8.0) - lru,bioruby ? - ---------------------------------------------------- + full,line,lazy 11m51 11m43 0m07 (0.8.6) 6,600M + none,bioruby 504m 477m 26m50 (0.8.0) + none,line 297m 267m 28m36 (0.8.5) + none,line,lazy 132m 106m 26m01 (0.8.6) 650M + + lru,bioruby 533m 510m 22m47 (0.8.5) + lru,line 353m 326m 26m44 (0.8.5) 1K + lru,line 305m 281m 22m30 (0.8.5) 10K + lru,line,lazy 182m 161m 21m10 (0.8.6) 10K + lru,line,lazy 75m 75m 0m17 (0.8.6) 50K 730M + ------------------------------------------------------------ + + Block parser: + + Cache real user sys gff3 version + ------------------------------------------------------------ + in preparation + ------------------------------------------------------------ + where 52M m_hapla.WS217.dna.fa 456M m_hapla.WS217.gff3 @@ -130,43 +154,47 @@ Bio::Log::CLI.trace('info') options = OpenStruct.new() # ---- Default options -options.parser = :bioruby +options.parser = :line opts = OptionParser.new() { |opts| opts.on_tail("-h", "--help", "Show help and examples") { print(opts) print USAGE exit() } - opts.on("--parser [bioruby,line]", String, "Parser (default bioruby)") do |p| + opts.on("--parser [bioruby,line]", String, "Parser (default line)") do |p| case p.downcase when 'bioruby' options.parser = :bioruby when 'line' options.parser = :line else raise 'Unknown --parser option' end end - opts.on("--cache [none,full]", String, "Caching (default full)") do |cache| + opts.on("--cache [full,lru,none]", String, "Caching (default full)") do |cache| case cache.downcase when 'none' options.cache = :cache_none - # when 'lru' - # options.cache = :cache_lru + when 'lru' + options.cache = :cache_lru when 'full' options.cache = :cache_full else raise 'Unknown --cache option' end end + opts.on("--block", "Parse by block") do |b| + options.block = true + end + opts.on("--no-assemble", "output sequences without assembling") do |b| options.no_assemble = true end opts.on("--phase", "adjust for phase (useful for CDS --no-assemble --translate)") do |b| @@ -227,19 +255,24 @@ end opts = {} opts[:validate] = options.validate opts[:parser] = options.parser + opts[:block] = options.block opts[:cache_components] = options.cache opts[:cache_records] = options.cache opts[:fasta_filename] = fastafn if fastafn opts[:fix_wormbase] = options.fix_wormbase opts[:fix] = options.fix opts[:no_assemble] = options.no_assemble opts[:phase] = options.phase opts[:debug] = options.debug + include Bio::GFFbrowser::Helpers::Logger + debug $:.to_s + debug opts.to_s + log_sys_info("BaseLine") gff3 = Bio::GFFbrowser::GFF3.new(fn,opts) gff = gff3.assembler writer = Bio::GFFbrowser::FastaWriter.new(options.translate, options.validate) @@ -263,11 +296,14 @@ when 'cds' gff.each_CDS_seq do | id, seq | writer.put(id,seq) end else - raise "Unknown action on type <#{gfftype}>" + gff.each_seq(gfftype.downcase) do | id, seq | + writer.put(id,seq) + end end + log_sys_info("Done") fastafn = nil end