#! /usr/bin/ruby # # Author:: Pjotr Prins # Copyright:: August 2010 # License:: Ruby License # # Copyright (C) 2010,2011 Pjotr Prins USAGE = < test.fa Cache real user sys ---------------------------------------------------- full 12m41s 12m28s 0m09s (0.8.0 Jan. 2011) none 504m39s 477m49s 26m50s (0.8.0 Jan. 2011) ---------------------------------------------------- where 52M m_hapla.WS217.dna.fa 456M m_hapla.WS217.gff3 ruby 1.9.2p136 (2010-12-25 revision 30365) [x86_64-linux] on an 8 CPU, 2.6 GHz (6MB cache), 16 GB RAM machine. == Cite If you use this software, please cite http://dx.doi.org/10.1093/bioinformatics/btq475 == Copyright Copyright (C) 2010,2011 Pjotr Prins EOM SEP = File::SEPARATOR rootpath = File.dirname(File.dirname(__FILE__))+SEP $: << rootpath+'lib' GFF3_VERSION = File.new(rootpath+'VERSION').read.chomp require 'bio-gff3' require 'optparse' require 'ostruct' require 'bio/output/gfffastawriter' $stderr.print "BioRuby GFF3 Plugin "+GFF3_VERSION+" Copyright (C) 2010,2011 Pjotr Prins \n\n" if ARGV.size == 0 print USAGE end options = OpenStruct.new() opts = OptionParser.new() { |opts| opts.on_tail("-h", "--help", "Show help and examples") { print(opts) print USAGE exit() } opts.on("--cache [none,full]", String, "Caching (default full)") do |cache| case cache.downcase when 'none' options.cache = :cache_none # when 'lru' # options.cache = :cache_lru when 'full' options.cache = :cache_full else raise 'Unknown --cache option' end end opts.on("--translate", "output as amino acid sequence") do |b| options.translate = b end opts.on("--validate", "validate GFF3 file by translating") do |v| options.validate = v $stop_on_error = true # replace global in near future end opts.on("--fix", "Fix frame errors in the GFF3 definition") do |v| options.fix = true end opts.on("--fix-wormbase", "Wormbase fix gene1 frame error") do |v| options.fix_wormbase = true end # opts.on("-q", "--quiet", "Run quietly") do |q| # options.quiet = q # end # opts.on("-v", "--[no-]verbose", "Run verbosely") do |v| # options.verbose = v # end # opts.on("-t", "--[no-]trace", "Debug") do |t| # options.trace = t # end } opts.parse!(ARGV) gfftype = ARGV.shift raise "Unknown GFF type '#{gfftype}'" if gfftype !~ /mrna|cds|exon/i fastafn = nil ARGV.each do | fn | if File.extname(fn) =~ /fa|fas|fasta$/i raise "Only one fasta file allowed per GFF3" if fastafn != nil fastafn = fn next end opts = {} opts[:validate] = options.validate opts[:cache_components] = options.cache opts[:cache_records] = options.cache opts[:fasta_filename] = fastafn if fastafn opts[:fix_wormbase] = options.fix_wormbase opts[:fix] = options.fix gffdb = Bio::GFFbrowser::GFFdb.new(fn,opts) gff = gffdb.assembler writer = Bio::GFFbrowser::FastaWriter.new(options.translate, options.validate) case gfftype.downcase when 'mrna' gff.each_mRNA_seq do | id, seq | writer.put(id,seq) end when 'exon' gff.each_exon_seq do | id, seq | writer.put(id,seq) end when 'cds' gff.each_CDS_seq do | id, seq | writer.put(id,seq) end else raise "Unknown action <#{gfftype}>" end fastafn = nil end