require 'une_los_hit'

module FlAnalysis
	
	def analiza_orf_y_fl(seq, blast_query, options, db_name)
		aas_n_end = options[:distance]
		pident_threshold = options[:ident]
		evalue_threshold = options[:evalue]
		# @verbose = options[:verbose]

		# test_blast_hits(blast_query)
		
		# used to detect if the sequence and the blast are from different query
		if seq.seq_name != blast_query.query_def
			raise "BLAST query name and sequence are different"
		end
		
		q=blast_query
		msgs = ''
		atg_status = ''
		end_status = ''
		final_status = ''
		
		# the fasta sequence is saved
		query_fasta = seq.seq_fasta

		if q.hits[0].nil? # There is no match in blast, the seq go to the next DB
			# puts "#{db_name} -- #{q.query_def} --> NO BLASTX match"
			
			# If the DB is trembl and the seq has annotations from other DB the annotations must be printed
			if (db_name =~ /^tr_/)
				if (seq.get_annotations(:tmp_annotation).empty?)
					if (seq.sec_desc.empty?)
						seq.annotate(:tcode,'')
					else
						seq.annotate(:tmp_annotation,[seq.sec_desc, '','',''],true)
					end
				else
					save_last_db_annotations(seq)
				end
			end
			
			return
		end
#----------------------------------------------------------------------------------------------------------
		warnings = ''
		errors = ''
		wrong_seq = false

		# if the sequence has more than one hit, the frames are checked and fixed to get an single hit
		if (q.hits.count > 1)
			
			seq_unida = UneLosHit.new(q, query_fasta, pident_threshold)
			
			wrong_seq = seq_unida.wrong_seq
			is_ok = seq_unida.is_ok
			q_index_start = seq_unida.q_index_start
			full_prot = seq_unida.full_prot
			
			query_fasta = seq_unida.output_seq # repaired fasta
			
			final_hit = seq_unida.final_hit # single hit
			msgs = seq_unida.msgs # warning messages
			x_number = seq_unida.number_x # number of nucleotides used to fix frame errors
			
		else # if there is only one hit

			if (q.hits[0].q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
				(query_fasta, q.hits[0].q_frame, q.hits[0].q_beg, q.hits[0].q_end) = reverse_seq(query_fasta, q.hits[0].q_frame, q.hits[0].q_beg, q.hits[0].q_end)
				q.hits[0].reversed = true
			end
			
			final_hit = q.hits[0] # single hit
			x_number = 0 # number of nucleotides used to fix frame errors
			
			full_prot = query_fasta[final_hit.q_frame-1, query_fasta.length+1].translate
			(is_ok, q_index_start) = contenidos_en_prot(final_hit, full_prot, q)
		end
		# test_final_hit(final_hit, query_fasta)
#----------------------------------------------------------------------------------------------------------
		if wrong_seq
			warnings = "ERROR#1, contains sense and antisense hits!!!, putative chimeric sequence, " + warnings
			# puts "ERROR#1, contains sense and antisense hits!!!, putative chimeric sequence"
			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#1\tcontains sense and antisense hits!!!, putative chimeric sequence, "
			error_log(q, seq, warnings, db_name)
			return
		end
		#----------------------------------------------------------------------------------------------------------
		warnings += msgs
		msgs = ''
		#----------------------------------------------------------------------------------------------------------
		if (x_number < 0)
			warnings = "ERROR#2, unexpected negative index in x_number, " + warnings
			# puts "ERROR#2, unexpected negative index in x_number"
			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#2\tunexpected negative index in x_number, "
			error_log(q, seq, warnings, db_name)
			return
		end
		#----------------------------------------------------------------------------------------------------------
		if (!is_ok)
			warnings = "ERROR#3, very serious frame error, " + warnings
			# puts "#{q.query_def} ERROR#3, hit was NOT found in the protein"
			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#3\thit was NOT found in the protein, "
			# error_log(q, seq, warnings, db_name)
			# return
		end
#----------------------------------------------------------------------------------------------------------
		fiable = false
		if ((final_hit.ident >= pident_threshold) && (final_hit.e_val <= evalue_threshold))
			fiable = true
		end
		# if the query protein is large enough at the start of the sequence should have the start codon
		if (final_hit.q_beg/3 + aas_n_end >= final_hit.s_beg.to_i) 
			substring = full_prot[0, q_index_start + 10]
			resto_substring = full_prot[q_index_start + 10, full_prot.length - q_index_start - 10]

			# to look for the beginning of the protein
			(m_substring, atg_status, msgs) = find_start(final_hit.s_beg, substring, fiable, aas_n_end)

			# pasting the substring sequence with the rest of the sequence
			tmp_prot = "#{m_substring}#{resto_substring}"
			# to get the value of the start_ORF index
			final_hit.q_beg = final_hit.q_beg.to_i - ((m_substring.length - 10) * 3)
		else
			# if (@verbose)
				# puts "beginning too short!"
			# end

			atg_status = 'incomplete'
			substring = full_prot[0, q_index_start]
			distance_s_atg = (final_hit.s_beg.to_i - final_hit.q_beg/3) + 1

			if (substring.rindex('*'))
				warnings += "Unexpected stop codon in the beginning of your sequence, "
				# if (@verbose)
					# puts "#{db_name} -- #{q.query_def} --> Unexpected stop codon in the beginning of your sequence"
				# end
			end

			final_hit.q_beg = final_hit.q_beg.to_i - (substring.length * 3)
			tmp_prot = full_prot
		end
#----------------------------------------------------------------------------------------------------------
		# look for the end of the protein
		(resto_substring, end_substring, end_status, warnings, putative_end) = find_end(final_hit, q, full_prot, tmp_prot, end_status, warnings, aas_n_end)
#----------------------------------------------------------------------------------------------------------
		final_prot = "#{resto_substring}#{end_substring}"
		
		warnings += msgs
		
		# to get the value of the end_ORF index
		if (atg_status == 'complete')
			final_hit.q_end = final_hit.q_beg - 3 + (final_prot.length * 3)
		else
			if (putative_end)
				final_hit.q_end = final_hit.q_end - 45 + (putative_end*3)
			end
		end
		
#--------------------------------------------------------------------------------------------------------------
		# decide the sequence status (Complete, Putative Complete, Internal, N-terminus, Putative N-terminus, C-terminus)
		final_status = determine_status(atg_status,end_status)
		#----------------------------------------------------------------------------------------------------------
		if (final_prot.length - 2*aas_n_end > final_hit.full_subject_length)
			warnings += " your sequence is longer than subject: #{final_prot.length} - #{final_hit.full_subject_length}"

		elsif (final_prot.length + aas_n_end < final_hit.full_subject_length)
			warnings += " your sequence is shorter than subject: #{final_prot.length} - #{final_hit.full_subject_length}"
			if (final_prot.length + 100 < final_hit.full_subject_length) || (final_prot.length*2 < final_hit.full_subject_length)
				
				if (final_status == 'Complete')
					final_status = 'Putative Complete'
					warnings += ". Was predicted as Complete, but is very much shorter than de subject"
					# if (@verbose)
					# 	puts "#{db_name} -- #{q.query_def} --> your sequence is 100 aas shorter than the subject or shorter than the half length of the subject"
					# end
				end
			end
		end
		
		# test_final_hit(final_hit, query_fasta)
		print_annotations(seq, q, final_hit, final_status, final_prot, warnings, query_fasta, db_name)
		
	end


	def test_blast_hits(q)
		
			puts "query_def: #{q.query_def} full_query_length: #{q.full_query_length} ------------------------------------------------"
			
			q.hits.each do |h|
				puts "\t subject_id: #{h.acc}"
				puts "\t acc: #{h.acc}"
				puts "\t full_subject_length: #{h.full_subject_length}"
				puts "\t q_beg: #{h.q_beg + 1}"
				puts "\t q_end: #{h.q_end + 1}"
				puts "\t q_frame: #{h.q_frame}"
				puts "\t s_beg: #{h.s_beg + 1}"
				puts "\t s_end: #{h.s_end + 1}"
				puts "\t s_frame: #{h.s_frame}"
				puts "\t align_len: #{h.align_len}"
				puts "\t gaps: #{h.gaps}"
				puts "\t mismatches: #{h.mismatches}"
				puts "\t reversed: #{h.reversed}"
				puts "\t score: #{h.score}"
				puts "\t bit_score: #{h.bit_score}"
				puts "\t ident: #{h.ident}"
				puts "\t e_val: #{h.e_val}"
				puts "\t definition: #{h.definition}"
				puts "\t q_seq: #{h.q_seq}"
				puts "\t s_seq: #{h.s_seq}"
				
			end

	end


	def test_final_hit(final_hit, query_fasta)
		
		puts "\t acc: #{final_hit.acc}"
		puts "\t full_subject_length: #{final_hit.full_subject_length}"

		puts "\n\t q_frame: #{final_hit.q_frame}"
		puts "\t reversed: #{final_hit.reversed}"
		
		puts "\n\t q_beg-q_end: #{final_hit.q_beg + 1} - #{final_hit.q_end + 1}"
		puts "\t s_beg - s_end: #{final_hit.s_beg + 1} - #{final_hit.s_end + 1}"

		puts "\n\t score: #{final_hit.score}, bit_score: #{final_hit.bit_score}, ident: #{final_hit.ident}, e_val: #{final_hit.e_val}"

		puts "\n\t definition: #{final_hit.definition}"
		puts "\t q_seq: #{final_hit.q_seq}"
		puts "\t s_seq: #{final_hit.s_seq}"
		
		puts "\nnt q_beg-q_end\n#{query_fasta[final_hit.q_beg..final_hit.q_end]}"
		puts "\n\nprot q_beg-q_end\n#{query_fasta[final_hit.q_beg..final_hit.q_end].translate}"
		
	end


	def error_log(q, seq, warnings, db_name)
		# seq.annotate(:error,"#{q.query_def}\t#{warnings}\t#{q.hits[0].definition}")
		
		if (db_name =~ /^tr_/)
			if (seq.get_annotations(:tmp_annotation).empty?)
				if (seq.sec_desc.empty?)
					if (!q.hits[0].definition.nil?)
						warnings = "Coding sequence with some errors, #{warnings}"
						seq.sec_desc = "#{q.query_def}\t#{seq.seq_fasta.length}\t#{q.hits[0].acc}\t#{db_name}\tCoding Seq\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
						seq.annotate(:tmp_annotation,[seq.sec_desc, '','',''],true)
					else
						seq.annotate(:tcode,'')
					end
				else
					warnings = "Coding sequence with some errors, #{warnings}"
					tmp_annot = seq.sec_desc.sub('my_warning',"#{warnings}")
					seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
				end
			else
				save_last_db_annotations(seq)
			end
		else
			if (seq.sec_desc.empty?)
				if (!q.hits[0].definition.nil?)
					warnings = "Coding sequence with some errors, #{warnings}"
					seq.sec_desc = "#{q.query_def}\t#{seq.seq_fasta.length}\t#{q.hits[0].acc}\t#{db_name}\tCoding Seq\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
				end
			end
		end
		
	end


	def save_last_db_annotations(seq)
	
		# puts "sequence not complete! recovering annotations from previous database! sldba!!"
		(q, final_hit, final_prot, query_fasta, final_status) = seq.get_annotations(:tmp_annotation).first[:message][3]
		print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)

		(name,fasta_length,acc,db_name,final_status,testcode,e_val,ident,my_length,subject_length,warnings,q_frame,q_beg,q_end,s_beg,s_end,description,final_prot) = seq.get_annotations(:tmp_annotation).first[:message][0].split("\t")
		if (final_hit.reversed) 
			(kk, q_frame, q_end, q_beg) = reverse_seq(query_fasta, q_frame.to_i, q_beg.to_i, q_end.to_i)
		end
	
		seq.annotate(:protein,seq.get_annotations(:tmp_annotation).first[:message][1])
		seq.annotate(:alignment,seq.get_annotations(:tmp_annotation).first[:message][2])
		tmp_annot = "#{name}\t#{fasta_length}\t#{acc}\t#{db_name}\t#{final_status}\t\t#{e_val}\t#{ident}\t#{my_length}\t#{subject_length}\t#{warnings}\t#{q_frame}\t#{q_beg}\t#{q_end}\t#{s_beg}\t#{s_end}\t#{description}\t#{final_prot}"
		seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
	
	end


	def find_start(subject_start, substring, fiable, aas_n_end)
		
		tmp_prot = ''
		msgs = ''
		atg_status = 'incomplete' # complete, incomplete or putative

		# puts "\nsubstring (#{substring.length} aas):\n#{substring}"
		stop_codon = substring.rindex('*')
		
		# marcamos la distancia al s_beg desde el principio del substring
		# s_beg_distance = (substring.length) - subject_start
		s_beg_distance = (substring.length - 10) - subject_start
		# marcamos la distancia al s_beg desde el final del substring
		atg_distance = (subject_start + 1) - (substring.length - 10)
		if (atg_distance <= 0) 
			atg_distance = 0
		else
			# puts "expected atg_distance = 0, your sequence atg_distance = #{atg_distance}; limit (1-15)"
			msgs = "atg_distance in limit (1-15): atg_distance = #{atg_distance}, "
		end
		
		# puts "s_beg_distance:#{s_beg_distance}, stop_codon: #{stop_codon}, subject_start: #{subject_start + 1}, atg_distance: #{atg_distance}"
		#----------------------------------------------------------------------------------------------------------
		# tenemos un codon de parada en el substring 5 prima
		if (stop_codon)
			stop_codon += 1
			# ahora vamos a ver si el stop esta antes o despues del s_beg
			if (stop_codon <= s_beg_distance) # esta antes
				substring = substring[stop_codon, substring.length - stop_codon]
				# puts "\nhay un codon de parada en el substring (#{substring.length} aas)\tstop_codon:#{stop_codon +1}\n#{substring}\n\n"
				
				first_m = substring.index('M')
				
				if (first_m) # tenemos M y stop ---------------------------------------------------------------------------
					substring = substring[first_m, substring.length - first_m]
					
					atg_status = 'complete'
				else # con STOP pero sin M --------------------------------------------------------------------------------
					atg_status = 'putative'
					# puts "there is not a start codon near the expected beginning of your sequence, distance to subject ATG= #{atg_distance} aas --> good simil: #{fiable}"
					msgs += "W1: There is no M at the beginning, "
				end
				#----------------------------------------------------------------------------------------------------------
			else # esta despues, un cambio de fase impide analizar el principio
				substring = substring[stop_codon, substring.length - stop_codon] # comentar?
				first_m = substring.index('M') # comentar?
				if (first_m) # tenemos M y unexpected stop # comentar?
					substring = substring[first_m, substring.length - first_m] # comentar?
				end # comentar?
				# TODO esto se puede cambiar!
				atg_status = 'putative'
				msgs += " Unexpected STOP codon in 5 prime region, "
				# puts "\nhay un codon de parada inesperado en el substring (#{substring.length} aas)\tstop_codon:#{stop_codon}, s_beg_distance: #{s_beg_distance +1}, atg_distance: #{atg_distance}"
			end
			#---------------------------------------------------------------------------------------------------------------
		else # no hay stop codon
			first_m = substring.index('M')
			if (first_m) # tenemos M, sin stop
				m_distance = subject_start - (substring.length - 10 - first_m)
				substring = substring[first_m, substring.length - first_m]
				# m_distance = [first_m+1,s_beg_distance].max - [first_m+1,s_beg_distance].min
				
				if (m_distance > aas_n_end*2) # sin STOP, con atg pero muy lejos del inicio que marca el subject ---------------
					# puts "No stop codon before M and M found is too far from subject M, distance to subject ATG= #{m_distance} aas --> good simil: #{fiable}"
					msgs += "No stop codon before M and M found is too far from subject M, "
					atg_status = 'incomplete'
				else
					if (fiable) # Tenemos M y aunque no hay STOP condon el ortologo es fiable ----------------------------------
						# msgs += "No stop codon before M but high homology subject, "
						atg_status = 'complete'
					else # Tenemos M pero no tenemos stop y el ortologo no es fiable -------------------------------------------
						# puts "No stop codon before M and low homology subject, distance to subject ATG= #{m_distance} aas --> good simil: #{fiable}"
						msgs += "No stop codon before M and low homology subject, "
						atg_status = 'putative'
					end
				end
			else # sin M ni STOP -------------------------------------------------------------------------------------------
				atg_status = 'putative'
				# puts "your sequence has the subject beginning but there is not start codon at the beginning, distance to subject ATG= #{atg_distance} aas --> good simil: #{fiable}"
				msgs += "W2: There is no M at the beginning, "
			end
		end
		
		return [substring, atg_status, msgs]
		
	end


	def find_end(final_hit, q, full_prot, tmp_prot, end_status, warnings, aas_n_end)
		# aqui vemos lo que queda sin similitud hasta el final
		s_end_resto = (final_hit.full_subject_length - (final_hit.s_end.to_i + 1)) # en el subject, numero de aas que necesito cubrir
		q_end_resto = (q.full_query_length.to_i - final_hit.q_end.to_i)/3 # en el query, numero de aas que tengo
		sq_end_distance = q_end_resto - s_end_resto
		
		cut_in_5p = full_prot.length - tmp_prot.length

		resto_substring = tmp_prot[0..final_hit.q_end/3 - cut_in_5p - 16]
		end_substring  =  tmp_prot[final_hit.q_end/3 - cut_in_5p - 15..tmp_prot.length]
		putative_end = end_substring.index('*')
		
		# si no tenemos suficiente secuencia para tener el stop (nos faltan 15 aas o mas)
		if (sq_end_distance + aas_n_end < 0)
			end_status = 'incomplete'
			if (putative_end)
				warnings += " Unexpected STOP codon at 3' end. Distance to subject end: #{sq_end_distance.abs} aas, "
				end_substring = end_substring[0, putative_end+1] # comentar?
				# if (@verbose)
				# 	puts "#{db_name} -- #{q.query_def} --> Unexpected STOP codon at 3' end. Distance to subject end: #{sq_end_distance.abs} aas"
				# end
			else
				warnings += "Distance to subject end: #{sq_end_distance.abs} aas, "
				# if (@verbose)
				# 	puts "#{db_name} -- #{q.query_def} --> Distance to subject end: #{sq_end_distance.abs} aas"
				# end
			end
			
		else # tenemos suficiente secuencia
			if (putative_end) # tenemos un stop
				q_stop_resto = (putative_end - 15) # distancia entre el stop y el q_end, si es negativo el stop esta antes del q_end
				qs_stop_distance = q_stop_resto - s_end_resto # distancia entre los stops del q y el s
				
				# puts "putative_end: #{putative_end}, q_stop_resto: #{q_stop_resto}, qs_stop_distance: #{qs_stop_distance}"
				
				if (qs_stop_distance + aas_n_end >= 0) # si q_end esta a menos de 15 aas antes o esta despues del s_end; complete
					end_status = 'complete'
				elsif (qs_stop_distance + 2*aas_n_end < 0) # si q_end es mas de 30 aas menor que el s_end; putative/Putative chimeric seq
					end_status = 'putative'
					warnings += " query STOP codon too far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas, putative chimeric sequence, "
					# if (@verbose)
					# 	puts "#{db_name} -- #{q.query_def} --> query STOP too far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas, putative chimeric sequence"
					# end
				elsif (qs_stop_distance + aas_n_end < 0) # si q_end es mas de 15 aas menor pero menos de 30 que el s_end; putative
					end_status = 'putative'
					warnings += " query STOP codon is far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas, "
					# if (@verbose)
					# 	puts "#{db_name} -- #{q.query_def} --> query STOP far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas"
					# end
				end
				end_substring = end_substring[0, putative_end+1]
				
			else # no tenemos codon de parada pero tenemos suficiente secuencia
				end_status = 'putative'
				warnings += " STOP codon was not found. Distance to subject end: #{sq_end_distance.abs} aas, "
				# if (@verbose)
				# 	puts "#{db_name} -- #{q.query_def} --> STOP codon was not found. Distance to subject end: #{sq_end_distance.abs} aas"
				# end
			end
			
		end
		
		return [resto_substring, end_substring, end_status, warnings, putative_end]
	end


	def determine_status(atg_status,end_status)
		
		if (atg_status == 'complete') && (end_status == 'complete') # proteina completa
			final_status = 'Complete'
		elsif (atg_status == 'putative' && end_status == 'complete') || (atg_status == 'complete' && end_status == 'putative') || (atg_status == 'putative' && end_status == 'putative') # comienzo y/o final putative
			final_status = 'Putative Complete'
		elsif (atg_status == 'incomplete') && (end_status == 'incomplete') # region intermedia
			final_status = 'Internal'
		elsif (atg_status == 'complete') && (end_status == 'incomplete') # tenemos el principio de la proteina
			final_status = 'N-terminus'
		elsif (atg_status == 'putative') && (end_status == 'incomplete') # puede que tengamos el principio de la proteina
			final_status = 'Putative N-terminus'
		elsif (atg_status == 'incomplete') && (end_status == 'complete') # tenemos el final de la proteina
			final_status = 'C-terminus'
		elsif (atg_status == 'incomplete') && (end_status == 'putative') # puede que tengamos el final de la proteina
			final_status = 'Putative C-terminus'
		end
		
		return final_status
	end


	def print_annotations(seq, q, final_hit, final_status, final_prot, warnings, query_fasta, db_name)
		name_diff = q.query_def.length - final_hit.acc.length
		if (name_diff > 0)
			spnum = ' '*name_diff.to_i
		else
			spnum = ''
		end
#-------------------------------------------------------------------------------------------------------------------------------------
		# if the sequence is Complete will be printed                 --------------------------------------------------------------------
		if (final_status == 'Complete') 
			seq.annotate(:protein,">#{q.query_def}\n#{final_prot}")
			print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)

			if (final_hit.reversed) 
				(kk, final_hit.q_frame, final_hit.q_end, final_hit.q_beg) = reverse_seq(seq.seq_fasta, final_hit.q_frame.to_i, final_hit.q_beg.to_i, final_hit.q_end.to_i)
			end
			seq.annotate(:complete,"#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}")
			seq.annotate(:alignment,"#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n")
#-------------------------------------------------------------------------------------------------------------------------------------
		else # la proteina no esta completa                      -------------------------------------------------------------------------
			if (!seq.get_annotations(:tmp_annotation).empty?) && (!seq.get_annotations(:tmp_annotation).nil?) # ---> trae informacion de una bd anterior
				if (db_name =~/^tr_/) #                                          --->  estamos usando el trembl, se dejan las anotaciones que trae
					# puts "#{db_name} -- #{q.query_def} --> print_annotations: sequence not complete! recovering annotations from previous database!"
					(kk1, final_hit, final_prot, query_fasta, final_status) = seq.get_annotations(:tmp_annotation).first[:message][3]
					print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)

					(name,fasta_length,acc,db_name,final_status,testcode,e_val,ident,my_length,subject_length,warnings,q_frame,q_beg,q_end,s_beg,s_end,description,final_prot) = seq.get_annotations(:tmp_annotation).first[:message][0].split("\t")
					if (final_hit.reversed) 
						(kk, q_frame, q_end, q_beg) = reverse_seq(query_fasta, q_frame.to_i, q_beg.to_i, q_end.to_i)
					end
					
					my_prot = seq.get_annotations(:tmp_annotation).first[:message][1]
					seq.annotate(:protein,my_prot)
					my_align = seq.get_annotations(:tmp_annotation).first[:message][2]
					seq.annotate(:alignment,my_align)
					
					tmp_annot = "#{name}\t#{query_fasta.length}\t#{acc}\t#{db_name}\t#{final_status}\t\t#{e_val}\t#{ident}\t#{my_length}\t#{subject_length}\t#{warnings}\t#{q_frame}\t#{q_beg}\t#{q_end}\t#{s_beg}\t#{s_end}\t#{description}\t#{final_prot}"
					seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
				#-----------------------------------------------------------------------------------------------------------------------------
				# elsif (db_name =~ /^sp_/) #                                       ---> estamos usando el sp, se dejan las anotaciones que trae
					
					# puts "#{db_name} -- #{q.query_def} --> print_annotations: Mantenemos las anotaciones de la BD de usuario y pasamos la secuencia al trembl"
				end
#-------------------------------------------------------------------------------------------------------------------------------------
			elsif (seq.get_annotations(:tmp_annotation).empty?) #                                ---> NO trae informacion de una bd anterior
				if (db_name =~ /^tr_/) #                                                                         ---> estamos usando el trembl
					# puts "#{db_name} -- #{q.query_def} --> print_annotations: #{q.query_def} is not complete!! se anota con trembl"
					print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)

					if (final_hit.reversed) 
						(kk, final_hit.q_frame, final_hit.q_end, final_hit.q_beg) = reverse_seq(seq.seq_fasta, final_hit.q_frame.to_i, final_hit.q_beg.to_i, final_hit.q_end.to_i)
					end

					seq.annotate(:alignment,"#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n")
					seq.annotate(:protein,">#{q.query_def}\n#{final_prot}")
					tmp_annot = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}"
					seq.annotate(:tmp_annotation,[tmp_annot, '','',''])
#-------------------------------------------------------------------------------------------------------------------------------------
				else #                                                                               cargamos anotaciones para la siguiente BD
					tmp_prot = ">#{q.query_def}\n#{final_prot}"
					tmp_align = "#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n"
					tmp_annot = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}"
					seq.sec_desc = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\tCoding Seq\t\t#{final_hit.e_val}\t#{final_hit.ident}\t\t#{final_hit.full_subject_length}\t#{warnings}\t\t\t\t\t\t#{final_hit.definition}\t"
					seq.annotate(:tmp_annotation,[tmp_annot, tmp_prot,tmp_align,[q, final_hit, final_prot, query_fasta, final_status]])
					
					# puts "\n\n\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.---#{q.query_def}\t#{final_status}\n#{tmp_prot}"
					# puts "#{db_name} -- #{q.query_def} --> print_annotations: cargamos anotaciones para utilizarlas en la siguiente BD"
				end
			end
		end
	end


	def print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status)
		
		bad_atg = false
#-------------------------------------------------------------------------------------------------------------  ATG

		if (final_status == 'Complete') || (final_status == 'Putative Complete') || (final_status == 'Putative N-terminus') || (final_status == 'N-terminus')
			# puts "entra aqui, final_status: #{final_status}"
			my_seq_n = query_fasta[final_hit.q_beg - 5..final_hit.q_beg + 5]

			beg5 = false
			# -------------------------------------   si my_seq_n = nil puede ser porque q_beg sea < 5
			if (final_hit.q_beg < 6)
				my_seq_n = query_fasta[0..10]
				beg5 = true
				# puts "empieza en el borde de la seq"
			end

			atg_found = my_seq_n.index(/ATG/i)
			atg_found_rv = my_seq_n.rindex(/ATG/i)
			my_atg_index = nil
		end

		if (!atg_found.nil?)
			if (beg5)

				my_seq_n.sub!(/ATG/i,'_-_ATG')
				my_atg_index = atg_found
				my_seq = my_seq_n + query_fasta[11..query_fasta.length + 1]

			elsif (atg_found == atg_found_rv)

				my_seq_n.sub!(/ATG/i,'_-_ATG')
				my_atg_index = final_hit.q_beg - 5 + atg_found
				
				my_seq = query_fasta[0..final_hit.q_beg - 6] + my_seq_n + query_fasta[final_hit.q_beg + 6..query_fasta.length + 1]
				
				# puts "my_seq despues de encontrar el atg: #{my_seq}"
			elsif (atg_found == 5) || (atg_found_rv == 5)

				my_seq_n = my_seq_n[0..4]+'_-_'+my_seq_n[5..10]
				my_atg_index = final_hit.q_beg - 5 + atg_found
				my_seq = query_fasta[0..final_hit.q_beg - 6] + my_seq_n + query_fasta[final_hit.q_beg + 6..query_fasta.length + 1]

			else

				# puts "#{q.query_def}  tiene mas de un ATG  my_seq_n: #{my_seq_n}"
				bad_atg = true
				my_seq = query_fasta
			end

		else

			bad_atg = true
			# puts "#{q.query_def}  NO TIENE ATG  my_seq_n: #{my_seq_n}"
			my_seq = query_fasta

		end
#-------------------------------------------------------------------------------------------------------------  STOP
	stop_c = nil
		if (final_status == 'Complete') || (final_status == 'Putative Complete') || (final_status == 'C-terminus') || (final_status == 'Putative C-terminus')

			if (bad_atg == true)
				stop_c = my_seq[final_hit.q_end - 2..final_hit.q_end]
				stop_c_longer = my_seq[final_hit.q_end - 7..final_hit.q_end + 5]
			else
				stop_c = my_seq[final_hit.q_end + 3..final_hit.q_end + 5]
				stop_c_longer = my_seq[final_hit.q_end - 2..final_hit.q_end + 10]
			end

		end

		if (!stop_c.nil?)
			# puts stop_c
			# puts stop_c_longer
			if (stop_c.translate == '*')

				if (bad_atg == true)
					my_seq = my_seq[0..final_hit.q_end] +'___'+ my_seq[final_hit.q_end + 1..my_seq.length + 1]
					seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG\t\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
				else

					my_seq = my_seq[0..final_hit.q_end + 5] +'___'+ my_seq[final_hit.q_end + 6..my_seq.length + 1]
					my_prot = my_seq.sub(/\w+_\-_/,'')
					my_prot = my_prot.sub(/___\w+/,'')
					my_prot = my_prot.translate
					my_prot = my_prot.sub(/x$/,'')

					simliar_fragment = final_prot.lcs(my_prot)

					if (simliar_fragment.length == final_prot.length) && (simliar_fragment.length == my_prot.length)
						seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\t\t\t\t\t\t#{my_seq}")
					else
						seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tthe nucleotide sequence contain a lot of errors\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
						# puts "nt seq: was no possible to find stop codon, the nucleotide sequence contain a lot of errors"
					end

				end

			else
				if (final_status == 'Putative Complete') || (final_status == 'C-terminus') || (final_status == 'Putative C-terminus')

					if (bad_atg == true)
						stop_c = my_seq[final_hit.q_end+1..final_hit.q_end+3]
						stop_c_longer = my_seq[final_hit.q_end - 4..final_hit.q_end + 8]
					else
						stop_c = my_seq[final_hit.q_end + 7..final_hit.q_end + 9]
						stop_c_longer = my_seq[final_hit.q_end..final_hit.q_end + 13]
					end
					
					if (!stop_c.nil?)
						if (stop_c.translate == '*')
							final_hit.q_end = final_hit.q_end + 3
							if (bad_atg == true)
								my_seq = my_seq[0..final_hit.q_end] +'___'+ my_seq[final_hit.q_end + 1..my_seq.length + 1]
								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG\t\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
							else
								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
							end
						else
							if (bad_atg == true)
								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG NO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
								# puts "find nt end: NO ATG, NO exact STOP"
							else
								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
								# puts "find nt end: GOOD ATG, NO exact STOP"
							end
						end
					end
				end
				

			end

		else

			if (bad_atg == true)
				seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG NO STOP\t\t\t\t\t#{my_seq}")
			else
				seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP\t\t\t\t\t#{my_seq}")
			end

		end
		
	end

end