#!/usr/bin/env ruby

# 12-2-2011 Noe Fernandez Pozo.
# Full-LengtherNEXT predicts if your sequences are complete, showing you the nucleotide sequences and the translated protein

#------------------------------------------------------------------ parameters entry
require 'optparse'
require 'socket'

options = {}

if !File.exists?('logs')
	Dir.mkdir('logs')
end

optparse = OptionParser.new do |opts|
	
	options[:fasta] = nil
	opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
		options[:fasta] = file
	end
	
	options[:tax_group] = nil
	opts.on( '-g', '--taxon_group GROUP', "Taxon group, required to use the best databases:\n\t\t\t\t\tfungi\n\t\t\t\t\thuman\n\t\t\t\t\tinvertebrates\n\t\t\t\t\tmammals\n\t\t\t\t\tplants\n\t\t\t\t\trodents\n\t\t\t\t\tvertebrates\n\n" ) do |tax_name|
		options[:tax_group] = tax_name
	end
	
	options[:user_db] = nil
	opts.on( '-d', '--blast_db DB_NAME', 'User blast plus database' ) do |db|
		options[:user_db] = db
	end
	
	# options[:verbose] = nil
	# opts.on( '-v', '--verbose_mode', "verbose mode\n\n" ) do |verbose|
	# 	options[:verbose] = verbose
	# end

	options[:evalue] = 1.0e-25
	opts.on( '-e', '--evalue EVALUE', 'e value threshold to consider as reliable the orthologue sequence. Default=1.0e-25' ) do |evalue|
		options[:evalue] = evalue.to_f
	end

	options[:ident] = 45.00
	opts.on( '-i', '--identity_percent IDENTITY', 'identity percent threshold to consider as reliable the sequence similarity. Default=45.00' ) do |ident|
		options[:ident] = ident.to_f
	end

	options[:distance] = 15
	opts.on( '-a', '--aas_distance DISTANCE', "distance threshold in aminoacids used for some calculations, the less distance the more strict. Default=15\n\n" ) do |distance|
		options[:distance] = distance.to_i
	end
	
	options[:workers] = 2
	opts.on( '-w', '--workers INTEGER/FILE', 'Number of CPUs, or a file containing machine names to launch workers with ssh' ) do |workers|
    if File.exists?(workers)
       # use workers file
       options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
       options[:workers].shift
    elsif (workers.to_i > 0)
       options[:workers] = workers.to_i
		else
			options[:workers] = 2
    end
  end

	options[:chunk_size] = 200
	opts.on( '-c', '--chunk_size SIZE', "Number of sequences processed in each block when parallelization is used. Default=200" ) do |s|
		options[:chunk_size] = s.to_i
	end
	
	options[:server_ip] = '0.0.0.0'
  opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|

    # get list of available ips
    ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
    
    ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
    
    if !ip
      ip='0.0.0.0'
      # $LOG.info("No available ip matching #{server_ip}")
    end
    # $ .info("Using ip #{ip}")
    options[:server_ip] = ip
  end

  options[:port] = 0 #50000
  opts.on( '-p', '--port PORT', "Server port\n\n" ) do |port|
    options[:port] = port.to_i
  end
  

	# Set a banner, displayed at the top of the help screen.
	opts.banner = "Usage: full_lengther_next -f input.fasta -g [fungi|human|invertebrates|mammals|plants|rodents|vertebrates] [options]\n\n"

	# This displays the help screen
	opts.on( '-h', '--help', 'Display this screen' ) do
		puts opts
		exit
	end
   
end

# parse options and remove from ARGV
optparse.parse!
# @verbose = options[:verbose]

# if (!@verbose.nil?)
# 	puts "You have chosen the verbose mode:\n\nInput File:\t#{options[:fasta]}\nTaxon Group:\t#{options[:tax_group]}\nOwn Database:\t#{options[:user_db]}\nCPU Number:\t#{options[:workers]}"
# end

#----------------------------------------------------------------------- testing errors in parameters entry
if (options[:fasta].nil?) || (options[:tax_group].nil?)
		puts "incorrect number of arguments, you need a fasta file and a taxonomical group:\n\n\t"
		puts optparse.help
		exit
end
#----------------------------------------------------------------------- loading classes and gems
ROOT_PATH=File.dirname(__FILE__)

# $: << File.expand_path(File.join(ROOT_PATH, "classes"))

# load gem path, only to test locally
# $: << File.expand_path('~/progs/ruby/gems/full_lengther_next/lib')

require 'full_lengther_next'


if ENV['FULL_LENGTHER_NEXT_INIT'] && File.exists?(ENV['FULL_LENGTHER_NEXT_INIT'])
  FULL_LENGTHER_NEXT_INIT=File.expand_path(ENV['FULL_LENGTHER_NEXT_INIT'])
else
  FULL_LENGTHER_NEXT_INIT=File.join(ROOT_PATH,'init_env')
end


if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB'])
  formatted_db_path = ENV['BLASTDB']
else # otherwise use ROOTPATH + DB
  formatted_db_path = File.expand_path(File.join(ROOT_PATH, "blast_dbs"))
end

ENV['BLASTDB']=formatted_db_path
puts "Using databases at: #{ENV['BLASTDB']}"

ncrna_path = File.join(ENV['BLASTDB'],'nc_rna_db','ncrna_fln_100.fasta.nhr')
if !File.exists?(ncrna_path)
  puts "DB File #{ncrna_path} doesn't exists"
	puts optparse.help
	exit
end

sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.fasta.psq")
if !File.exists?(sp_path)
  puts "DB File #{sp_path} doesn't exists, or"
	puts "incorrect taxon group name: #{options[:tax_group]} choose:"
	puts optparse.help
	exit
end

require 'scbi_blast' # is a gem
require 'scbi_mapreduce'
# puts $:
require 'fl_string_utils'
require "une_los_hit"
require "lcs" # like the class simliar of seqtrim, return the longest common sequence
require "test_code"

##########################################################  MAIN  #################################################################

require 'my_worker_manager'

$LOG = Logger.new(STDOUT)
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"

custom_worker_file = File.join(ROOT_PATH,'classes','my_worker.rb')

	$LOG.info 'Starting server'
	# initialize work manager (open files, etc)
	MyWorkerManager.init_work_manager(options, options[:chunk_size])
	
	# Create server
	server = ScbiMapreduce::Manager.new(options[:server_ip],options[:port], options[:workers], MyWorkerManager,custom_worker_file, STDOUT,FULL_LENGTHER_NEXT_INIT)
	server.chunk_size=options[:chunk_size]
	# launch server
	server.start_server
	
	$LOG.info 'Closing server'

	puts "\nGracias por utilizar Full-LengtherNEXT"