#!/usr/bin/env ruby

# 12-2-2011 Noe Fernandez Pozo.
# Full-LengtherNEXT predicts if your sequences are complete, showing you the nucleotide sequences and the translated protein
ROOT_PATH=File.dirname(__FILE__)
$: << File.expand_path(File.join(ROOT_PATH, "../lib/full_lengther_next/classes/"))

require 'optparse'
require 'socket'

###############################################################################################
# PARSE OPTIONS
###############################################################################################
options = {}

if !File.exists?('logs')
	Dir.mkdir('logs')
end

optparse = OptionParser.new do |opts|

	options[:acess_db] = 'stnp'
	opts.on( '-a', '--acess_db STRING', 'Select that databases is going to be used. s for Swissprot, t for trEMBL and n for ncrna, p for use Transdecoder and c for use testcode algothrim. By default is set to stnp' ) do |acess_db|
		options[:acess_db] = acess_db
	end

    options[:blast] = ''
    opts.on( '-b', '--blast STRING', 'Aditional options to blast execution' ) do |blast|
            options[:blast] = blast
    end

	options[:chunk_size] = 200
	opts.on( '-c', '--chunk_size SIZE', "Number of sequences processed in each block when parallelization is used. Default=200" ) do |s|
		options[:chunk_size] = s.to_i
	end

	options[:est_db] = nil
	opts.on( '-d', '--est_db FILE', "EST database for representative transcriptome\n" ) do |est_db|
		options[:est_db] = est_db
		if !File.exists?(options[:est_db])
			puts "No valid path to EST database"
			Process.exit(-1)
		end
	end

	options[:exonerate] = TRUE
	opts.on( '-e', '--exonerate', 'Disables exonerate analysis' ) do |exonerate|
		options[:exonerate] = FALSE
	end

	options[:fasta] = nil
	opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
		options[:fasta] = file
	end

	options[:tax_group] = nil
	opts.on( '-g', '--taxon_group GROUP', "Taxon group, required to use the best databases:\n#{"\t"*5}fungi\n#{"\t"*5}human\n#{"\t"*5}invertebrates\n#{"\t"*5}mammals\n#{"\t"*5}plants\n#{"\t"*5}rodents\n#{"\t"*5}vertebrates\n\n" ) do |tax_name|
		options[:tax_group] = tax_name
	end

	options[:ident] = 45.00
	opts.on( '-i', '--identity_percent IDENTITY', 'identity percent threshold to consider as reliable the sequence similarity. Default=45.00' ) do |ident|
		options[:ident] = ident.to_f 
	end

	options[:high_clustering] = TRUE
	opts.on( '-k', '--high_clustering', 'Only for representative transcriptome. Add a clustering step using pfam ids. Default true' ) do
		options[:high_clustering] = FALSE
	end

	options[:subject_coverage] = 0.25
	opts.on( '-j', '--subject_coverage_percent FLOAT', "Subject coverage percentage threshold" ) do |j|
		options[:subject_coverage] = j.to_f/100
	end

	options[:min_nucleotides] = 100
	opts.on( '-n', '--min_nucleotides minLONG', "min nucleotides to consider a part of chimera like putative unigene. Default=100\n\n" ) do |min_nucleotides|
		options[:min_nucleotides] = min_nucleotides.to_i
	end

	options[:distance] = 15
	opts.on( '-m', '--max_distance maxDIST', "maximal distance between query and subject gene boundaries to be qualified as putative, the less distance the more strict. Default=15\n\n" ) do |distance|
		options[:distance] = distance.to_i
	end

	options[:port] = 0 #50000
	opts.on( '-p', '--port PORT', "Server port\n\n" ) do |port|
		options[:port] = port.to_i
	end
  
	options[:chimera] = 'rc' 
	opts.on( '-q', '--chimera_detection STRING', "d for deactivate chimera detection mode, s for search chimeras only, r for revise it and c for cut it. Default = rcs \n\n" ) do |chimera|
		chimera.downcase!
		options[:chimera] = chimera
	end

	options[:reptrans] = nil
	opts.on( '-r', '--representative_transcriptome', "Generates a fasta file with the minime transcriptome\n" ) do |reptrans|
		options[:reptrans] = reptrans
	end

	options[:server_ip] = '0.0.0.0'
	opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|

		# get list of available ips
		ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}   
		ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
    
		if !ip
			ip='0.0.0.0'
			# $LOG.info("No available ip matching #{server_ip}")
		end
		# $ .info("Using ip #{ip}")
		options[:server_ip] = ip
	end

	options[:ident_thresold] = 55.0
	opts.on( '-t', '--identity_thresold FLOAT', "For chimeras only. Min identity to consider that two proteins are the same. Default=55.0\n\n" ) do |ident_thresold|
		options[:ident_thresold] = ident_thresold.to_i
	end

	options[:user_db] = nil
	opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db|
		options[:user_db] = db	
		if !File.exists?(File.expand_path(options[:user_db])+'.psq')
			puts "user database: #{options[:user_db]} was not found"
			exit
		end 	
	end

	options[:verbose] = 0
	opts.on( '-v', '--verbose INTEGER', 'Show extra info' ) do |verbose|
		options[:verbose] = verbose.to_i
	end

	options[:workers] = 2
	opts.on( '-w', '--workers INTEGER/FILE', 'Number of CPUs, or a file containing machine names to launch workers with ssh' ) do |workers|

             if File.exists?(workers)
               # use workers file
               options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
               options[:workers].shift
             elsif (workers.to_i > 0)
               options[:workers] = workers.to_i
             else
               options[:workers] = 2
             end

    end

	options[:training_ident] = 45.00
	opts.on( '-x', '--training_identity_percent IDENTITY', 'identity percent threshold to use a complete sure sequence for Transdecoder training. Default=45.00' ) do |training_ident|
		options[:training_ident] = ident.to_f 
	end

	options[:hdd] = FALSE
	opts.on( '-z', '--hdd', 'Write/use blast report on HDD' ) do |hdd|
		options[:hdd] = TRUE
	end

	
	# Set a banner, displayed at the top of the help screen.
	opts.banner = "\nUsage: full_lengther_next -f input.fasta -g [fungi|human|invertebrates|mammals|plants|rodents|vertebrates] [options]\n\n"

	# This displays the help screen
	opts.on( '-h', '--help', 'Display this screen' ) do
		puts opts
		exit
	end
   
end

# parse options and remove from ARGV
optparse.parse!

if (options[:fasta].nil?) || (options[:tax_group].nil?)
	puts "incorrect number of arguments, you need a fasta file and a taxonomical group:\n\n\t"
	puts optparse.help
	exit
end

###################################################################################################
# PREPARE ENVIROMENT
###################################################################################################
if ENV['FULL_LENGTHER_NEXT_INIT'] && File.exists?(ENV['FULL_LENGTHER_NEXT_INIT'])
  FULL_LENGTHER_NEXT_INIT=File.expand_path(ENV['FULL_LENGTHER_NEXT_INIT'])
else
  FULL_LENGTHER_NEXT_INIT=File.join(ROOT_PATH,'init_env')
end

if !File.exists?('temp')
	Dir.mkdir('temp')
end

if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB'])
  formatted_db_path = ENV['BLASTDB']
else # otherwise use ROOTPATH + DB
  formatted_db_path = File.expand_path(File.join(ROOT_PATH, "blast_dbs"))
end

ENV['BLASTDB']=formatted_db_path
puts "Using databases at: #{ENV['BLASTDB']}"

ncrna_path = File.join(ENV['BLASTDB'],'nc_rna_db','ncrna.nhr')
if !File.exists?(ncrna_path) && options[:acess_db].include?('c')
  	puts "DB File #{ncrna_path} doesn't exists"
	puts optparse.help
	exit
end

if options[:acess_db].include?('s') || options[:acess_db].include?('t')
	sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.psq")
	if !File.exists?(sp_path)
		puts "DB File #{sp_path} doesn't exists, or"
		puts "incorrect taxon group name: #{options[:tax_group]} choose:"
		puts optparse.help
		exit
	end
end

##################################################################################################
# MAIN  
###################################################################################################

require 'scbi_mapreduce'
require 'my_worker_manager_fln' #First server
require 'reptrans'

$LOG = Logger.new(STDOUT)
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"

main_path = File.dirname(ROOT_PATH)
custom_worker_file = File.join(main_path, 'lib','full_lengther_next','classes','my_worker.rb')

$LOG.info 'Starting server'
	# initialize work manager (open files, etc)
	MyWorkerManagerFln.init_work_manager(options)
	
	# Create server
	server = ScbiMapreduce::Manager.new(options[:server_ip], options[:port], options[:workers], MyWorkerManagerFln, custom_worker_file, STDOUT, FULL_LENGTHER_NEXT_INIT)
	server.chunk_size = options[:chunk_size]
	
	# launch server
	server.start_server
$LOG.info 'Closing server'

if !options[:reptrans].nil?
	seqs_annotation_prot, seqs_some_coding ,seqs_unknown= MyWorkerManagerFln.get_annotations()
	reptrans(seqs_annotation_prot, seqs_some_coding ,seqs_unknown, options)
end
puts "\nGracias por utilizar Full-LengtherNEXT"