#!/usr/bin/env ruby require 'optparse' require 'genevalidator' require 'genevalidator/version' opt = {} opt_parser = OptionParser.new do |opts| opts.banner = <', Array, 'The Validations to be applied.', 'Validation Options Available (separated by coma):', ' all = All validations (default),', ' lenc = Length validation by clusterization,', ' lenr = Length validation by ranking,', ' merge = Analyse gene merge,', ' dup = Check for duplications,', ' frame = Open reading frame (ORF) validation,', ' orf = Main ORF validation,', ' align = Validating based on multiple alignment') do |lst| opt[:validations] = lst end opt[:db] = 'swissprot -remote' opts.on('-d', '--db [BLAST_DATABASE]', 'Path to the BLAST database', 'GeneValidator also supports remote databases:', 'e.g. $ genevalidator -d "swissprot -remote" Input_File') do |db| opt[:db] = db end opts.on('-x', '--blast_xml_file [FILE]', 'Provide GeneValidator with a pre-computed BLAST XML output', 'file (BLAST -outfmt option 5).') do |blast_xml_file| opt[:blast_xml_file] = blast_xml_file end opts.on('-t', '--blast_tabular_file [FILE]', 'Provide GeneValidator with a pre-computed BLAST tabular output', 'file. (BLAST -outfmt option 6).') do |blast_tabular_file| opt[:blast_tabular_file] = blast_tabular_file end # default blast tabular columns opts.on('-o', '--blast_tabular_options [Options]', 'Custom format used in BLAST -outfmt argument', 'See BLAST+ manual pages for more details') do |blast_table_options| opt[:blast_tabular_options] = blast_table_options end opt[:num_threads] = 1 opts.on('-n', '--num_threads num_of_threads', 'Specify the number of processor threads to use when running', 'BLAST and Mafft within GeneValidator.') do |num_threads| opt[:num_threads] = num_threads end opt[:fast] = false opts.on('-f', '--fast', 'Run BLAST on all sequences together (rather than separately)', 'to speed up the analysis.', 'However, this means that there will be a longer wait before the', 'results can be viewed (as GeneValidator will need to run BLAST', 'on all sequences before producing any results).', 'The speed difference will be more apparent on larger input files') do opt[:fast] = true end opts.on('-r', '--raw_sequences [raw_seq]', 'Supply a fasta file of the raw sequences of all BLAST hits present', 'in the supplied BLAST XML or BLAST tabular file.') do |raw_seq| opt[:raw_sequences] = raw_seq end opts.on('-m', '--mafft_bin [MAFFT_PATH]', 'Path to MAFFT bin folder (is added to $PATH variable)') do |mafft| opt[:mafft_bin] = mafft end opts.on('-b', '--blast_bin [BLAST_PATH]', 'Path to BLAST+ bin folder (is added to $PATH variable)') do |blast| opt[:blast_bin] = blast end opts.on('--version', 'The version of GeneValidator that you are running.') do puts GeneValidator::VERSION exit end opts.on('-h', '--help', 'Show this screen.') do puts opt_parser exit end end begin opt_parser.parse!(ARGV) if ARGV.length > 1 $stderr.puts 'Error: you must specify a single fasta input file instead' \ ' of ' + ARGV.length.to_s + ".\n" exit 1 elsif ARGV.length == 0 puts opt_parser exit 1 end rescue OptionParser::ParseError $stderr.print 'Error: ' + $ERROR_INFO.to_s + '\n' exit 1 end start = Time.now opt[:input_fasta_file] = ARGV[0] (GeneValidator::Validation.new(opt)).run puts "Total running time: #{(Time.now - start).round(3)}s" puts # a blank line