#!/usr/bin/env ruby # SeqTrimNext: Next generation sequencing preprocessor # Copyright (C) <2011> # Authors: Almudena Bocinos Rioboo, Diego Dario Guerrero Fernandez, # Rocio Bautista Moreno, Juan Falgueras Cano & M. Gonzalo Claros # email: soporte@scbi.uma.es - http://www.scbi.uma.es # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. # along with this program. If not, see <http://www.gnu.org/licenses/>.

ROOT_PATH=File.dirname(__FILE__)

$: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
$: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib/')

require 'seqtrimnext'

############ PATHS #######################

$SEQTRIM_PATH = ROOT_PATH

if ENV['SEQTRIMNEXT_INIT'] && File.exists?(ENV['SEQTRIMNEXT_INIT'])
  $SEQTRIMNEXT_INIT=File.expand_path(ENV['SEQTRIMNEXT_INIT'])
else
  $SEQTRIMNEXT_INIT=File.join($SEQTRIM_PATH,'init_env')
end

# if there is a BLASTDB environment var, then use it
if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
  $FORMATTED_DB_PATH = ENV['BLASTDB']
  $DB_PATH = File.dirname($FORMATTED_DB_PATH)
else
  # otherwise use ROOTPATH + DB
  $FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
  $DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
end

ENV['BLASTDB']=$FORMATTED_DB_PATH

OUTPUT_PATH='output_files' For this is used ARGV, that is an array. options = {} optparse = OptionParser.new do |opts| # Set a banner, displayed at the top # of the help screen. opts.banner = "Usage: #{$0} -t template_file \{-Q fastaQ_file | -f fasta_file -q qual_file\} [options]" # Define the options, and what they do #options[:server_ip] = '' options[:server_ip] = '' opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip| # get list of available ips ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address} ip=ip_list.select{|ip| ip.index(server_ip)==0}.first if !ip ip='' # $LOG.info("No available ip matching #{server_ip}") end # $ .info("Using ip #{ip}") options[:server_ip] = ip end options[:port] = 0 #50000 opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port| options[:port] = port.to_i end options[:workers] = 2 opts.on( '-w', '--workers COUNT', 'Number of workers, or file containing machine names to launch workers with ssh' ) do |workers| if File.exists?(workers) # use workers file options[:workers] = File.read(workers).split("\n").map{|w| w.chomp} else begin options[:workers] = Integer(workers) rescue STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}" exit end end end options[:only_workers] = false opts.on( '-o', '--only_workers', 'Only launch workers' ) do options[:only_workers] = true end options[:check_db] = false opts.on( '-c', '--check_databases', 'Check Blast databases and reformat if necessary' ) do options[:check_db] = true end # options[:skip_initial_stats] = false # opts.on( '-k', '--skip_initial_stats', 'Skip initial stats' ) do # options[:skip_initial_stats] = true # end options[:install_db] = nil opts.on( '-i', '--install_databases TYPE', 'Install base databases and reformat them if necessary') do |db_type| options[:install_db] = db_type end options[:logfile] = STDOUT opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file| options[:logfile] = file end options[:fastq] = nil opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file| options[:fastq] = file end options[:fasta] = nil opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file| options[:fasta] = file end options[:qual] = nil opts.on( '-q', '--qual FILE', 'Qual input file' ) do |file| options[:qual] = file end options[:list_db] = nil options[:list_db_name] = 'ALL' opts.on( '-L', '--list_db [DB_NAME]', 'List entries IDs in DB_NAME. Use "-L all" to view all available databases' ) do |value| options[:list_db] = true options[:list_db_name] = value if value end options[:gen_params] = false opts.on( '-G', '--generate_template', 'Generates a sample template file with default parameters' ) do options[:gen_params] = true end options[:template] = nil opts.on( '-t', '--template TEMPLATE_FILE', 'Use TEMPLATE_FILE instead of default parameters' ) do |file| options[:template] = file end options[:chunk_size] = 100 opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs| options[:chunk_size] = cs.to_i end options[:json] = nil opts.on( '-j', '--json', 'Save results in json file' ) do options[:json] = true end options[:skip_output] = false opts.on( '-K', '--no-verbose', 'Change to no verbose mode. Every sequence will not be written to output log' ) do options[:skip_output] = true end options[:skip_report] = false opts.on( '-R', '--no-report', 'Do not generate final PDF report (gem scbi_seqtrimnext_report required if you want to generate PDF report).' ) do options[:skip_report] = true end # This displays the help screen, all programs are # assumed to have this option. opts.on_tail( '-h', '--help', 'Display this screen' ) do puts opts show_additional_help exit end end # parse options and remove from ARGV optparse.parse! if options[:list_db] then # List database entries in a database ListDb.new($DB_PATH,options[:list_db_name]) exit end if options[:gen_params] then # Generates a sample params file in current directory Params.generate_sample_params exit end #set logger # system('rm logs/*') FileUtils.mkdir('logs') if !File.exists?('logs') $LOG = Logger.new(options[:logfile]) $LOG.datetime_format = "%Y-%m-%d %H:%M:%S" #logger.level = Logger::INFO #DEBUG < INFO < WARN < ERROR < FATAL < UNKNOWN $LOG.info("SeqTrimNext version #{Seqtrimnext::SEQTRIM_VERSION}") $LOG.info("Using BLASTDB: "+ $FORMATTED_DB_PATH) $LOG.info("Using options: "+ options.to_json) if options[:install_db] then #install databases InstallDatabase.new(options[:install_db],$DB_PATH) # reformat databases MakeBlastDb.new($DB_PATH) exit end if !File.exists?($FORMATTED_DB_PATH) STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)" exit end if options[:check_db] then # check and format blast databases MakeBlastDb.new($DB_PATH) exit end required_options = options[:template] && (options[:fastq] || (options[:fasta])) # if ((ARGV.count != 2) && (ARGV.count != 3)) # con esto vemos si hay argumentos, if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos, puts "You must provide all required options" puts "" puts optparse.help exit end # check for template if (!File.exists?(options[:template])) if File.exists?(File.join($SEQTRIM_PATH,'templates',options[:template])) options[:template] = File.join($SEQTRIM_PATH,'templates',options[:template]) else $LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n=============================" puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)} exit end end $LOG.info "Using init file: #{$SEQTRIMNEXT_INIT}" $LOG.info "Using params file: #{options[:template]}" # fastq file if (!options[:fastq].nil? && options[:fastq]!='-' && !File.exists?(options[:fastq])) $LOG.error "Input file: #{options[:fasta]} doesn't exists" exit end # fasta file if (!options[:fasta].nil? && !File.exists?(options[:fasta])) $LOG.error "Input file: #{options[:fasta]} doesn't exists" exit end # qual file if ((!options[:qual].nil?)&&(!File.exists?(options[:qual]))) $LOG.error "Input file: #{options[:qual]} doesn't exists" exit end s = Seqtrim.new(options) #generate report if !options[:skip_report] && system("which generate_report.rb > /dev/null ") cmd="generate_report.rb output_files 2> report_generation_errors.log" $LOG.info "Generating report #{cmd}" `#{cmd}` else skip_text='.' if options[:skip_report] skip_text=' and remove the -R option from the command line.' end $LOG.info "If you want a detailed report in PDF format, be sure you have installed the optional seqtrimnext_report gem (gem install seqtrimnext_report)#{skip_text}" end