#! /usr/bin/ruby # # Author:: Pjotr Prins # Copyright:: August 2010 # License:: Ruby License # # Copyright (C) 2010,2011 Pjotr Prins USAGE = < /dev/null > test.fa Cache real user sys ------------------------------------------------------------ full,bioruby 12m41 12m28 0m09 (0.8.0) full,line 12m13 12m06 0m07 (0.8.5) none,bioruby 504m39 477m49 26m50 (0.8.0) lru,bioruby ? ---------------------------------------------------- where 52M m_hapla.WS217.dna.fa 456M m_hapla.WS217.gff3 ruby 1.9.2p136 (2010-12-25 revision 30365) [x86_64-linux] on an 8 CPU, 2.6 GHz (6MB cache), 16 GB RAM machine. == Cite If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475 == Copyright Copyright (C) 2010,2011 Pjotr Prins EOM rootpath = File.dirname(File.dirname(__FILE__)) $: << File.join(rootpath,'lib') GFF3_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp require 'bio-gff3' require 'optparse' require 'ostruct' require 'bio/output/gfffastawriter' $stderr.print "BioRuby GFF3 Plugin "+GFF3_VERSION+" Copyright (C) 2010,2011 Pjotr Prins \n\n" if ARGV.size == 0 print USAGE end Bio::Log::CLI.logger('stderr') Bio::Log::CLI.trace('info') options = OpenStruct.new() # ---- Default options options.parser = :bioruby opts = OptionParser.new() { |opts| opts.on_tail("-h", "--help", "Show help and examples") { print(opts) print USAGE exit() } opts.on("--parser [bioruby,line]", String, "Parser (default bioruby)") do |p| case p.downcase when 'bioruby' options.parser = :bioruby when 'line' options.parser = :line else raise 'Unknown --parser option' end end opts.on("--cache [none,full]", String, "Caching (default full)") do |cache| case cache.downcase when 'none' options.cache = :cache_none # when 'lru' # options.cache = :cache_lru when 'full' options.cache = :cache_full else raise 'Unknown --cache option' end end opts.on("--no-assemble", "output sequences without assembling") do |b| options.no_assemble = true end opts.on("--phase", "adjust for phase (useful for CDS --no-assemble --translate)") do |b| options.phase = true end opts.on("--translate", "output as amino acid sequence") do |b| options.translate = b end opts.on("--validate", "validate GFF3 file by translating") do |v| options.validate = v end opts.on("--fix", "Fix frame errors in the GFF3 definition") do |v| options.fix = true end opts.on("--fix-wormbase", "Wormbase fix gene1 frame error") do |v| options.fix_wormbase = true end opts.on("--logger filename",String,"Log to file (default stderr)") do | name | Bio::Log::CLI.logger(name) end opts.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s | Bio::Log::CLI.trace(s) end opts.on("-q", "--quiet", "Run quietly") do |q| Bio::Log::CLI.trace('error') end opts.on("-v", "--verbose", "Run verbosely") do |v| Bio::Log::CLI.trace('info') end opts.on("--debug", "Show debug messages") do |v| Bio::Log::CLI.trace('debug') options.debug = true end } opts.parse!(ARGV) Bio::Log::CLI.configure('bio-gff3') gfftype = ARGV.shift fastafn = nil ARGV.each do | fn | if File.extname(fn) =~ /fa|fas|fasta$/i raise "Only one fasta file allowed per GFF3" if fastafn != nil fastafn = fn next end opts = {} opts[:validate] = options.validate opts[:parser] = options.parser opts[:cache_components] = options.cache opts[:cache_records] = options.cache opts[:fasta_filename] = fastafn if fastafn opts[:fix_wormbase] = options.fix_wormbase opts[:fix] = options.fix opts[:no_assemble] = options.no_assemble opts[:phase] = options.phase opts[:debug] = options.debug gff3 = Bio::GFFbrowser::GFF3.new(fn,opts) gff = gff3.assembler writer = Bio::GFFbrowser::FastaWriter.new(options.translate, options.validate) case gfftype.downcase when 'gene' gff.each_gene_seq do | id, seq | writer.put(id,seq) end when 'orf' gff.each_gene_seq do | id, seq | writer.put(id,seq) end when 'mrna' gff.each_mRNA_seq do | id, seq | writer.put(id,seq) end when 'exon' gff.each_exon_seq do | id, seq | writer.put(id,seq) end when 'cds' gff.each_CDS_seq do | id, seq | writer.put(id,seq) end else raise "Unknown action on type <#{gfftype}>" end fastafn = nil end