require 'scbi_blast'
require 'fl_analysis' #Allow call 'analiza_orf_y_fl'
require 'common_functions'

module ChimericSeqs
	BEG = 0
	STOP = 1		
	HIT = 2
	def search_chimeras(seq, blast_query, options, db_name, db_path)
		
		# DETECTION
		#----------------------
		homology_zones = []
		cut_positions = []
		if blast_query.hits.length > 1 
			homology_zones = define_homology_zones(blast_query, options, seq.seq_fasta)		
			cut_positions = set_cut_positions(homology_zones) if homology_zones.length > 1
		end
		# CONFIRMATION
		#----------------------
		num_homology_zones = homology_zones.length
		if num_homology_zones > 1 && options[:chimera].include?('r')
			confirm_chimeras(homology_zones, db_path, options[:ident_thresold]) # Check if prots are differents or not
			num_homology_zones = homology_zones.length
		end

		# SPLICING
		#--------------------
		new_seqs=[]		
		if num_homology_zones > 1 #In this case the sequence is a chimera
			seq.format_chimera!
			homology_zones.each_with_index do |hom_zone, i|
				seq.hit << hom_zone[HIT].first.dup #Save hit before modified it for write output purposes
				hit_limits = get_limits(hom_zone[HIT])# Take beginning and end of hit on query, hit can be composed by unsorted or antisense hsps
				if options[:chimera].include?('c') && hit_limits[STOP]-hit_limits[BEG]> options[:min_nucleotides] 
					new_seqs << fragment_chimera(blast_query, seq, hom_zone[HIT], i, hit_limits, num_homology_zones, options, db_name, cut_positions[i])
					seq.warnings('SOLVED')
				end
			end
		else
			new_seqs = nil #Sequence isn't chimera
		end
		return new_seqs
	end

	def set_cut_positions(homology_zones)
		cut_positions = []
		last_cut = -1
		homology_zones.each_with_index do |hom_zone, i|
			if i > 0
				positions = []
				positions << last_cut + 1 # Start of fragment
				cut_position = homology_zones[i-1][STOP] + (hom_zone[BEG] - homology_zones[i-1][STOP])/2
				positions << cut_position # End of fragment
				last_cut = cut_position
				cut_positions << positions
			end
		end
		cut_positions << [last_cut, homology_zones.last[HIT].first.q_len-1]
		return cut_positions		
	end

	def confirm_chimeras(homology_zones, db_path, ident_thresold)
		acc_hit = homology_zones.map{|zone| zone[HIT].first.acc}
		seq_fasta = %x[blastdbcmd -db #{db_path} -entry #{acc_hit.join(',')}]
		seq_fasta << ">remove\nALGO\n" #Needed for clustal-omega display the dist-matrix, requires unless 3 sequences to do it

		clustal_matrix = do_clustal(seq_fasta)
		clustal_matrix.shift #Remove header
		clustal_matrix.pop #Remove false sequence 
		
		clustal_hits = []
		distances = []
		clustal_matrix.each do |line|
			fields = line.split
			fields.pop #Remove data belong to false sequence
			fields.shift #Remove prot name
			distances << fields.map! {|field| field.to_f}	
		end
		delete_positions = search_ident_prots(homology_zones, distances, ident_thresold)
		delete_zones(delete_positions, homology_zones)
	end
	
	
	def search_ident_prots(homology_zones, distances, ident_thresold)
		delete_positions = []		
		n_homology_zones = homology_zones.length
		n_homology_zones.times do |j|
			n_homology_zones.times do |i|			
				next if i == j
				if distances[j][i] >= ident_thresold
					delete_positions << j
					delete_positions << i
				end
			end
		end
		delete_positions.uniq!
		return delete_positions
	end
	
	
	def fragment_chimera(query, seq, hit, hit_position, hit_limits, num_homology_zones, options, db_name, cut_positions)
		# Prepare new seq and query
		#----------------------------
		query_bak = query.dup
		query_bak.hits = hit # Here, hit is an array of hsps
		query_bak.query_def += "_split_#{hit_position}"
		seq_bak = seq.dup
		seq_bak.reset_classification
		seq_bak.clean_warnings
		seq_bak.seq_name += "_split_#{hit_position}"
		seq_bak.clean_orfs
		seq_bak.save_fasta = TRUE
		seq_bak.ignore = FALSE

		# Cut sequence and move hit/hsps limits
		#----------------------------------------
		if hit_position == 0 #First zone
			limit = 0
			if hit.first.q_frame < 0 #Hit reversed
				hit.first.q_frame = -1
			end
		else #Middle & last zone
			limit = cut_positions[BEG]#hit_limits[BEG]
			hit_move_limits(hit, -limit, 0) #Redefine hit limits on new sequence after cut	
			if hit.first.q_frame >= 0
				hit.first.q_frame=1
			elsif hit_position < num_homology_zones-1 #Last zone keeps his original frame because it's composed by the hit and the terminal sequence (Here hit is reversed).
				hit.first.q_frame=-1
			end
		end
		if hit_position == num_homology_zones-1 #Last zone
			seq_bak.seq_fasta = seq.seq_fasta[cut_positions[BEG]..seq.fasta_length-1]#[hit_limits[BEG]..seq.fasta_length-1]
		else	# Beginning & Middle zone
			seq_bak.seq_fasta = seq.seq_fasta[limit..cut_positions[STOP]]#[limit..hit_limits[STOP]]
		end
		seq_length = seq_bak.seq_fasta.length
		query_bak.full_query_length = seq_length
		seq_bak.fasta_length = seq_length
		hit_set_q_len(hit, seq_length)


		# Full length analisys of fragment
		#----------------------------------------	
		analiza_orf_y_fl(seq_bak, query_bak.hits, options, db_name)

		return seq_bak
	end
	
	def define_homology_zones(query, options, query_fasta)
		# Define hit limits
		#---------------------
		hits = cluster_query_hits(query) #Hsp packages
		hits_limits = define_hit_limits(hits)

		# Define homology zones
		#------------------------
		#First homology zone
		zones = [[hits_limits.first[BEG], hits_limits.first[STOP], hits.first]]
		ref_hit_beg = hits_limits.first[BEG]
		ref_hit_end = hits_limits.first[STOP]
		
		#Other homology zone
		hits_limits.each_with_index do |hit, i|
			coincidences = 0
			zones.each do |zone|
				if  hit_is_in?(zone[BEG], zone[STOP], hit) # Extender zona de homologia si coinciden en zona 
					zone[BEG] = [zone[BEG],hit[BEG]].min
					zone[STOP] = [zone[STOP],hit[STOP]].max
					coincidences+=1
				end
			end
			if coincidences == 0
				zones << [hit[BEG], hit[STOP], hits[i]]
			end
		end
		zones.sort!{|e1,e2| e1[BEG] <=> e2[BEG]}

		# Delete overlapping homology zones
		#------------------------------------
		overlapping_zones = overlapping_zones(zones)
		delete_zones(overlapping_zones, zones)

		return zones
	end

	def define_hit_limits(hits)
		limits=[]
		hits.each do |hit|
			limits << get_limits(hit) 
		end
		return limits
	end

	def get_limits(hit)
		coordenates=[]
		hit.map{|h| coordenates << h.q_beg; coordenates << h.q_end}	
		#		BEG				END		
 		limits=[coordenates.min, coordenates.max]
		return limits
	end

	def get_limits_s(hit)
		coordenates=[]
		hit.map{|h| coordenates << h.s_beg; coordenates << h.s_end}	
		#		BEG				END		
 		limits=[coordenates.min, coordenates.max]
		return limits
	end

	def cluster_query_hits(query)
		hits = []
		acc_hit = []
		query.hits.each do |hit|
			ind = acc_hit.index(hit.acc)
			if ind.nil?
				acc_hit << hit.acc
				hits << [hit]
			else
				hits[ind] << hit
			end
		end
		return hits		
	end

	def delete_zones(overlapping_zones, zones)
		overlapping_zones.each do |zone|
			zones.delete_at(zone)
		end
	end

	def overlapping_zones(zones)
		delete_zones=[]
		zones.each_with_index do |zone, i|
			if i>0
				if zone[BEG]< zones[i-1][STOP]
					delete_zones << i
					delete_zones << i-1	
				end
			end
		end
		delete_zones.uniq!
		return delete_zones
	end

	def hit_is_in?(h_beg, h_end, hit)
		is=FALSE
				# CONTIENE					#OVERLAP
		if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
			is=TRUE
		end
		return is
	end
	
	def get_hits(query, ref_hit)
		all_hits=[]
		query.hits.each do |hit|
			if hit.acc == ref_hit.acc
				all_hits << hit
			end
		end
		return all_hits
	end 


	def min_distance_between_homology_zones(homology_zones)
		distance=nil
		homology_zones.each_with_index do |zone,i|
			if i > 0
				local_distance=homology_zones[i][BEG] - homology_zones[i-1][STOP]
				if distance.nil? || distance > local_distance
					distance=local_distance
				end	
			end
		end
		return distance
	end

	def duplicate_hits(query)
		dup_hits=[]
		query.hits.each do |hit|
			dup_hits << hit.dup
		end
		return dup_hits
	end
	
	def set_limits(hit, q_beg, q_end, s_beg, s_end)
		hit.q_beg = q_beg
		hit.q_end = q_end
		hit.s_beg = s_beg
		hit.s_end = s_end
	end

	def move_limits(hit, q_add, s_add)
		hit.q_beg+=q_add
		hit.q_end+=q_add
		hit.s_beg+=s_add
		hit.s_end+=s_add
		if hit.class.to_s == 'ExoBlastHit' && !hit.q_frameshift.empty? #There is frameshift
			hit.q_frameshift.map!{|fs| 
				[fs.first + q_add, fs.last]
			}
		end
	end
	
	def hit_move_limits(hit, q_add, s_add)
		if hit.class.to_s == 'Array'
			hit.each do |hsp|
				move_limits(hsp, q_add, s_add)
			end
		elsif hit.class.to_s == 'Hit'
			#puts "\e[35m#{hit.acc}\t#{hit.q_beg}\t#{hit.q_end}\t#{hit.s_beg}\t#{hit.s_end}\t#{hit.reversed}\e[0m"
			move_limits(hit, q_add, s_add)
		end
	end

	def hit_set_q_len(hit, q_len)
		hit.each do |hsp|
			hsp.q_len=q_len
		end
	end

	def do_clustal(seq_fasta)
		cmd='clustalo -i -  -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
		clustal_matrix = nil
		IO.popen(cmd,'w+') {|clustal|
			clustal.sync = TRUE
			clustal.write(seq_fasta)
			clustal.close_write
			clustal_matrix = clustal.readlines
			clustal.close_read
		}
		return clustal_matrix
	end
	
end