#!/usr/bin/env ruby # 12-2-2011 Noe Fernandez Pozo. # Full-LengtherNEXT predicts if your sequences are complete, showing you the nucleotide sequences and the translated protein #------------------------------------------------------------------ parameters entry require 'optparse' require 'socket' options = {} if !File.exists?('logs') Dir.mkdir('logs') end optparse = OptionParser.new do |opts| options[:fasta] = nil opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file| options[:fasta] = file end options[:tax_group] = nil opts.on( '-g', '--taxon_group GROUP', "Taxon group, required to use the best databases:\n\t\t\t\t\tfungi\n\t\t\t\t\thuman\n\t\t\t\t\tinvertebrates\n\t\t\t\t\tmammals\n\t\t\t\t\tplants\n\t\t\t\t\trodents\n\t\t\t\t\tvertebrates\n\n" ) do |tax_name| options[:tax_group] = tax_name end options[:user_db] = nil opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db| options[:user_db] = db end # options[:verbose] = nil # opts.on( '-v', '--verbose_mode', "verbose mode\n\n" ) do |verbose| # options[:verbose] = verbose # end options[:evalue] = 1.0e-25 opts.on( '-e', '--evalue EVALUE', 'e value threshold to consider as reliable the orthologue sequence. Default=1.0e-25' ) do |evalue| options[:evalue] = evalue.to_f end options[:ident] = 45.00 opts.on( '-i', '--identity_percent IDENTITY', 'identity percent threshold to consider as reliable the sequence similarity. Default=45.00' ) do |ident| options[:ident] = ident.to_f end options[:distance] = 15 opts.on( '-m', '--max_distance maxDIST', "maximal distance between query and subject gene boundaries to be qualified as putative, the less distance the more strict. Default=15\n\n" ) do |distance| options[:distance] = distance.to_i end options[:workers] = 2 opts.on( '-w', '--workers INTEGER/FILE', 'Number of CPUs, or a file containing machine names to launch workers with ssh' ) do |workers| if File.exists?(workers) # use workers file options[:workers] = File.read(workers).split("\n").map{|w| w.chomp} options[:workers].shift elsif (workers.to_i > 0) options[:workers] = workers.to_i else options[:workers] = 2 end end options[:chunk_size] = 200 opts.on( '-c', '--chunk_size SIZE', "Number of sequences processed in each block when parallelization is used. Default=200" ) do |s| options[:chunk_size] = s.to_i end options[:server_ip] = '0.0.0.0' opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip| # get list of available ips ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address} ip=ip_list.select{|ip| ip.index(server_ip)==0}.first if !ip ip='0.0.0.0' # $LOG.info("No available ip matching #{server_ip}") end # $ .info("Using ip #{ip}") options[:server_ip] = ip end options[:port] = 0 #50000 opts.on( '-p', '--port PORT', "Server port\n\n" ) do |port| options[:port] = port.to_i end # Set a banner, displayed at the top of the help screen. opts.banner = "Usage: full_lengther_next -f input.fasta -g [fungi|human|invertebrates|mammals|plants|rodents|vertebrates] [options]\n\n" # This displays the help screen opts.on( '-h', '--help', 'Display this screen' ) do puts opts exit end end # parse options and remove from ARGV optparse.parse! # @verbose = options[:verbose] # if (!@verbose.nil?) # puts "You have chosen the verbose mode:\n\nInput File:\t#{options[:fasta]}\nTaxon Group:\t#{options[:tax_group]}\nOwn Database:\t#{options[:user_db]}\nCPU Number:\t#{options[:workers]}" # end #----------------------------------------------------------------------- testing errors in parameters entry if (options[:fasta].nil?) || (options[:tax_group].nil?) puts "incorrect number of arguments, you need a fasta file and a taxonomical group:\n\n\t" puts optparse.help exit end #----------------------------------------------------------------------- loading classes and gems ROOT_PATH=File.dirname(__FILE__) # $: << File.expand_path(File.join(ROOT_PATH, "classes")) # load gem path, only to test locally # $: << File.expand_path('~/progs/ruby/gems/full_lengther_next/lib') require 'full_lengther_next' if ENV['FULL_LENGTHER_NEXT_INIT'] && File.exists?(ENV['FULL_LENGTHER_NEXT_INIT']) FULL_LENGTHER_NEXT_INIT=File.expand_path(ENV['FULL_LENGTHER_NEXT_INIT']) else FULL_LENGTHER_NEXT_INIT=File.join(ROOT_PATH,'init_env') end if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB']) formatted_db_path = ENV['BLASTDB'] else # otherwise use ROOTPATH + DB formatted_db_path = File.expand_path(File.join(ROOT_PATH, "blast_dbs")) end ENV['BLASTDB']=formatted_db_path puts "Using databases at: #{ENV['BLASTDB']}" ncrna_path = File.join(ENV['BLASTDB'],'nc_rna_db','ncrna_fln_100.fasta.nhr') if !File.exists?(ncrna_path) puts "DB File #{ncrna_path} doesn't exists" puts optparse.help exit end sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.fasta.psq") if !File.exists?(sp_path) puts "DB File #{sp_path} doesn't exists, or" puts "incorrect taxon group name: #{options[:tax_group]} choose:" puts optparse.help exit end require 'scbi_blast' # is a gem require 'scbi_mapreduce' # puts $: require 'fl_string_utils' require "une_los_hit" require "lcs" # like the class simliar of seqtrim, return the longest common sequence require "test_code" ########################################################## MAIN ################################################################# require 'my_worker_manager' $LOG = Logger.new(STDOUT) $LOG.datetime_format = "%Y-%m-%d %H:%M:%S" custom_worker_file = File.join(File.dirname(ROOT_PATH),'lib','full_lengther_next','classes','my_worker.rb') $LOG.info 'Starting server' # initialize work manager (open files, etc) MyWorkerManager.init_work_manager(options, options[:chunk_size]) # Create server server = ScbiMapreduce::Manager.new(options[:server_ip],options[:port], options[:workers], MyWorkerManager,custom_worker_file, STDOUT,FULL_LENGTHER_NEXT_INIT) server.chunk_size=options[:chunk_size] # launch server server.start_server $LOG.info 'Closing server' puts "\nGracias por utilizar Full-LengtherNEXT"