#! /usr/bin/env ruby # # Author:: Pjotr Prins # License:: Ruby License COPYRIGHT = "Copyright (C) 2010-2012 Pjotr Prins " USAGE = < /dev/null > test.fa Digesting parser: Cache real user sys version RAM ------------------------------------------------------------ full,bioruby 12m41 12m28 0m09 (0.8.0) full,line 12m13 12m06 0m07 (0.8.5) full,line,lazy 11m51 11m43 0m07 (0.8.6) 6,600M none,bioruby 504m 477m 26m50 (0.8.0) none,line 297m 267m 28m36 (0.8.5) none,line,lazy 132m 106m 26m01 (0.8.6) 650M lru,bioruby 533m 510m 22m47 (0.8.5) lru,line 353m 326m 26m44 (0.8.5) 1K lru,line 305m 281m 22m30 (0.8.5) 10K lru,line,lazy 182m 161m 21m10 (0.8.6) 10K lru,line,lazy 75m 75m 0m17 (0.8.6) 50K 730M ------------------------------------------------------------ Block parser: Cache real user sys gff3 version ------------------------------------------------------------ in preparation ------------------------------------------------------------ where 52M m_hapla.WS217.dna.fa 456M m_hapla.WS217.gff3 ruby 1.9.2p136 (2010-12-25 revision 30365) [x86_64-linux] on an 8 CPU, 2.6 GHz (6MB cache), 16 GB RAM machine. == Cite If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/bts080 or http://dx.doi.org/10.1093/bioinformatics/btq475 == Copyright #{COPYRIGHT} EOM rootpath = File.dirname(File.dirname(__FILE__)) $: << File.join(rootpath,'lib') GFF3_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp require 'bio-gff3' require 'optparse' require 'ostruct' require 'bio/output/gfffastawriter' $stderr.print "gff3-fetch (biogem Ruby #{RUBY_VERSION}) "+GFF3_VERSION+" "+COPYRIGHT+"\n\n" if ARGV.size == 0 print USAGE end Bio::Log::CLI.logger('stderr') Bio::Log::CLI.trace('info') options = OpenStruct.new() # ---- Default options options.parser = :line opts2 = OptionParser.new() { |opts| opts.on_tail("-h", "--help", "Show help and examples") { print(opts) print USAGE exit() } opts.on("--parser [bioruby,line]", String, "Parser (default line)") do |p| case p.downcase when 'bioruby' options.parser = :bioruby when 'line' options.parser = :line else raise 'Unknown --parser option' end end opts.on("--cache [full,lru,none]", String, "Caching (default full)") do |cache| case cache.downcase when 'none' options.cache = :cache_none when 'lru' options.cache = :cache_lru when 'full' options.cache = :cache_full else raise 'Unknown --cache option' end end opts.on("--block", "Parse by block") do |b| options.block = true end opts.on("--no-assemble", "output sequences without assembling") do |b| options.no_assemble = true end opts.on("--phase", "adjust for phase (useful for CDS --no-assemble --translate)") do |b| options.phase = true end opts.on("--translate", "output as amino acid sequence") do |b| options.translate = b end opts.on("--validate", "validate GFF3 file by translating") do |v| options.validate = v end opts.on("--fix", "Fix frame errors in the GFF3 definition") do |v| options.fix = true end opts.on("--fix-wormbase", "Wormbase fix gene1 frame error") do |v| options.fix_wormbase = true end opts.on("--logger filename",String,"Log to file (default stderr)") do | name | Bio::Log::CLI.logger(name) end opts.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s | Bio::Log::CLI.trace(s) end opts.on("-q", "--quiet", "Run quietly") do |q| Bio::Log::CLI.trace('error') end opts.on("-v", "--verbose", "Run verbosely") do |v| Bio::Log::CLI.trace('info') end opts.on("--debug", "Show debug messages") do |v| Bio::Log::CLI.trace('debug') options.debug = true end } opts = opts2 opts.parse!(ARGV) Bio::Log::CLI.configure('bio-gff3') gfftype = ARGV.shift fastafn = nil ARGV.each do | fn | if File.extname(fn) =~ /fa|fas|fasta$/i raise "Only one fasta file allowed per GFF3" if fastafn != nil fastafn = fn next end opts = {} opts[:validate] = options.validate opts[:parser] = options.parser opts[:block] = options.block opts[:cache_components] = options.cache opts[:cache_records] = options.cache opts[:fasta_filename] = fastafn if fastafn opts[:fix_wormbase] = options.fix_wormbase opts[:fix] = options.fix opts[:no_assemble] = options.no_assemble opts[:phase] = options.phase opts[:debug] = options.debug include Bio::GFFbrowser::Helpers::Logger debug $:.to_s debug opts.to_s log_sys_info("BaseLine") gff3 = Bio::GFFbrowser::GFF3.new(fn,opts) gff = gff3.assembler writer = Bio::GFFbrowser::FastaWriter.new(options.translate, options.validate) case gfftype.downcase when 'gene' gff.each_gene_seq do | id, seq | writer.put(id,seq) end when 'orf' gff.each_gene_seq do | id, seq | writer.put(id,seq) end when 'mrna' gff.each_mRNA_seq do | id, seq | writer.put(id,seq) end when 'exon' gff.each_exon_seq do | id, seq | writer.put(id,seq) end when 'cds' gff.each_CDS_seq do | id, seq | writer.put(id,seq) end else gff.each_seq(gfftype.downcase) do | id, seq | writer.put(id,seq) end end log_sys_info("Done") fastafn = nil end