#!/usr/bin/env ruby # 12-2-2011 Noe Fernandez Pozo. # Full-LengtherNEXT predicts if your sequences are complete, showing you the nucleotide sequences and the translated protein ROOT_PATH=File.dirname(__FILE__) $: << File.expand_path(File.join(ROOT_PATH, "../lib/full_lengther_next/classes/")) require 'optparse' require 'socket' ############################################################################################### # PARSE OPTIONS ############################################################################################### options = {} if !File.exists?('logs') Dir.mkdir('logs') end optparse = OptionParser.new do |opts| options[:acess_db] = 'stnp' opts.on( '-a', '--acess_db STRING', 'Select that databases is going to be used. s for Swissprot, t for trEMBL and n for ncrna, p for use Transdecoder and c for use testcode algothrim. By default is set to stnp' ) do |acess_db| options[:acess_db] = acess_db end options[:blast] = '' opts.on( '-b', '--blast STRING', 'Aditional options to blast execution' ) do |blast| options[:blast] = blast end options[:chunk_size] = 200 opts.on( '-c', '--chunk_size SIZE', "Number of sequences processed in each block when parallelization is used. Default=200" ) do |s| options[:chunk_size] = s.to_i end options[:est_db] = nil opts.on( '-d', '--est_db FILE', "EST database for representative transcriptome\n" ) do |est_db| options[:est_db] = est_db if !File.exists?(options[:est_db]) puts "No valid path to EST database" Process.exit(-1) end end options[:exonerate] = TRUE opts.on( '-e', '--exonerate', 'Disables exonerate analysis' ) do |exonerate| options[:exonerate] = FALSE end options[:fasta] = nil opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file| options[:fasta] = file end options[:tax_group] = nil opts.on( '-g', '--taxon_group GROUP', "Taxon group, required to use the best databases:\n#{"\t"*5}fungi\n#{"\t"*5}human\n#{"\t"*5}invertebrates\n#{"\t"*5}mammals\n#{"\t"*5}plants\n#{"\t"*5}rodents\n#{"\t"*5}vertebrates\n\n" ) do |tax_name| options[:tax_group] = tax_name end options[:ident] = 45.00 opts.on( '-i', '--identity_percent IDENTITY', 'identity percent threshold to consider as reliable the sequence similarity. Default=45.00' ) do |ident| options[:ident] = ident.to_f end options[:high_clustering] = FALSE opts.on( '-k', '--high_clustering', 'Only for representative transcriptome. Add a clustering step using pfam ids. Default false' ) do options[:high_clustering] = TRUE end options[:subject_coverage] = 0.25 opts.on( '-j', '--subject_coverage_percent FLOAT', "Subject coverage percentage threshold" ) do |j| options[:subject_coverage] = j.to_f/100 end options[:min_nucleotides] = 100 opts.on( '-n', '--min_nucleotides minLONG', "min nucleotides to consider a part of chimera like putative unigene. Default=100\n\n" ) do |min_nucleotides| options[:min_nucleotides] = min_nucleotides.to_i end options[:distance] = 15 opts.on( '-m', '--max_distance maxDIST', "maximal distance between query and subject gene boundaries to be qualified as putative, the less distance the more strict. Default=15\n\n" ) do |distance| options[:distance] = distance.to_i end options[:port] = 0 #50000 opts.on( '-p', '--port PORT', "Server port\n\n" ) do |port| options[:port] = port.to_i end options[:chimera] = 'rc' opts.on( '-q', '--chimera_detection STRING', "d for deactivate chimera detection mode, s for search chimeras only, r for revise it and c for cut it. Default = rcs \n\n" ) do |chimera| chimera.downcase! options[:chimera] = chimera end options[:reptrans] = nil opts.on( '-r', '--representative_transcriptome', "Generates a fasta file with the minime transcriptome\n" ) do |reptrans| options[:reptrans] = reptrans end options[:server_ip] = '0.0.0.0' opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip| # get list of available ips ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address} ip=ip_list.select{|ip| ip.index(server_ip)==0}.first if !ip ip='0.0.0.0' # $LOG.info("No available ip matching #{server_ip}") end # $ .info("Using ip #{ip}") options[:server_ip] = ip end options[:ident_thresold] = 55.0 opts.on( '-t', '--identity_thresold FLOAT', "For chimeras only. Min identity to consider that two proteins are the same. Default=55.0\n\n" ) do |ident_thresold| options[:ident_thresold] = ident_thresold.to_i end options[:user_db] = nil opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db| options[:user_db] = db if !File.exists?(File.expand_path(options[:user_db])+'.psq') puts "user database: #{options[:user_db]} was not found" exit end end options[:verbose] = 0 opts.on( '-v', '--verbose INTEGER', 'Show extra info' ) do |verbose| options[:verbose] = verbose.to_i end options[:workers] = 2 opts.on( '-w', '--workers INTEGER/FILE', 'Number of CPUs, or a file containing machine names to launch workers with ssh' ) do |workers| if File.exists?(workers) # use workers file options[:workers] = File.read(workers).split("\n").map{|w| w.chomp} options[:workers].shift elsif (workers.to_i > 0) options[:workers] = workers.to_i else options[:workers] = 2 end end options[:training_ident] = 45.00 opts.on( '-x', '--training_identity_percent IDENTITY', 'identity percent threshold to use a complete sure sequence for Transdecoder training. Default=45.00' ) do |training_ident| options[:training_ident] = ident.to_f end options[:hdd] = FALSE opts.on( '-z', '--hdd', 'Write/use blast report on HDD' ) do |hdd| options[:hdd] = TRUE end # Set a banner, displayed at the top of the help screen. opts.banner = "\nUsage: full_lengther_next -f input.fasta -g [fungi|human|invertebrates|mammals|plants|rodents|vertebrates] [options]\n\n" # This displays the help screen opts.on( '-h', '--help', 'Display this screen' ) do puts opts exit end end # parse options and remove from ARGV optparse.parse! if (options[:fasta].nil?) || (options[:tax_group].nil?) puts "incorrect number of arguments, you need a fasta file and a taxonomical group:\n\n\t" puts optparse.help exit end ################################################################################################### # PREPARE ENVIROMENT ################################################################################################### if ENV['FULL_LENGTHER_NEXT_INIT'] && File.exists?(ENV['FULL_LENGTHER_NEXT_INIT']) FULL_LENGTHER_NEXT_INIT=File.expand_path(ENV['FULL_LENGTHER_NEXT_INIT']) else FULL_LENGTHER_NEXT_INIT=File.join(ROOT_PATH,'init_env') end if !File.exists?('temp') Dir.mkdir('temp') end if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB']) formatted_db_path = ENV['BLASTDB'] else # otherwise use ROOTPATH + DB formatted_db_path = File.expand_path(File.join(ROOT_PATH, "blast_dbs")) end ENV['BLASTDB']=formatted_db_path puts "Using databases at: #{ENV['BLASTDB']}" ncrna_path = File.join(ENV['BLASTDB'],'nc_rna_db','ncrna.nhr') if !File.exists?(ncrna_path) && options[:acess_db].include?('c') puts "DB File #{ncrna_path} doesn't exists" puts optparse.help exit end if options[:acess_db].include?('s') || options[:acess_db].include?('t') sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.psq") if !File.exists?(sp_path) puts "DB File #{sp_path} doesn't exists, or" puts "incorrect taxon group name: #{options[:tax_group]} choose:" puts optparse.help exit end end ################################################################################################## # MAIN ################################################################################################### require 'scbi_mapreduce' require 'my_worker_manager_fln' #First server require 'reptrans' $LOG = Logger.new(STDOUT) $LOG.datetime_format = "%Y-%m-%d %H:%M:%S" main_path = File.dirname(ROOT_PATH) custom_worker_file = File.join(main_path, 'lib','full_lengther_next','classes','my_worker.rb') $LOG.info 'Starting server' # initialize work manager (open files, etc) MyWorkerManagerFln.init_work_manager(options) # Create server server = ScbiMapreduce::Manager.new(options[:server_ip], options[:port], options[:workers], MyWorkerManagerFln, custom_worker_file, STDOUT, FULL_LENGTHER_NEXT_INIT) server.chunk_size = options[:chunk_size] # launch server server.start_server $LOG.info 'Closing server' if !options[:reptrans].nil? seqs_annotation_prot, seqs_some_coding ,seqs_unknown= MyWorkerManagerFln.get_annotations() reptrans(seqs_annotation_prot, seqs_some_coding ,seqs_unknown, options) end puts "\nGracias por utilizar Full-LengtherNEXT"