require 'orf'
require 'types'
require 'warnings'
require 'common_functions'

class Sequence
	
	attr_accessor :seq_name, :seq_fasta, :fasta_length, :db_name, :seq_nt, :seq_aa, :db, :type, :status, :id, :orfs, :area_without_annotation, :save_fasta, :ignore, :hit, :t_code, :functional_annotations, :fpkm, :coverage_analysis

	def initialize(seq_name, seq_fasta, seq_qual='')
		@seq_name = seq_name
		@seq_fasta = seq_fasta
		@fasta_length = seq_fasta.length
		@db_name = nil
		@seq_nt = nil # Unigen sequence with tagged ATG & stop
		@seq_aa = nil # Protein sequence generated over unigen
		@db =nil
		@type = UNKNOWN # See types.rb
		@status = FALSE # TRUE => Sure, FALSE => Putative
		@id = nil #Prot or EST id, can be several => array
		@warnings = []
		@annotations=[]
		@functional_annotations = {}
		@orfs=[]

		#Mapping_info
		@fpkm = []
		@coverage_analysis = []
		
		@area_without_annotation=FALSE
		@save_fasta=TRUE
		@ignore=FALSE 
		@hit=nil
		@t_code=0
	end

	
	def add_orf(orf_seq, orf_t_start, orf_t_end, orf_q_frame, orf_stop_codon, orf_type)
		orf = Orf.new(orf_seq, orf_t_start, orf_t_end, orf_q_frame, orf_stop_codon, orf_type)
		@orfs << orf
	end
		

	def change_degenerated_nt!	
		translate_hash = {}
		translate_hash['R']= [['a','g'],0]
		translate_hash['W']= [['a','t'],0]
		translate_hash['M']= [['a','c'],0]
		translate_hash['K']= [['g','t'],0]
		translate_hash['S']= [['g','c'],0]
		translate_hash['Y']= [['c','t'],0]
		translate_hash['H']= [['a','t','c'],0]
		translate_hash['B']= [['g','t','c'],0]
		translate_hash['D']= [['g','a','t'],0]
		translate_hash['V']= [['g','a','c'],0]
		translate_hash['N']= [['g','a','c','t'],0]


		fix_degenerated_fasta!(translate_hash)		
	end

	
	def fix_degenerated_fasta!(translate_hash)
		s = @seq_fasta
		res = []

		nts_of_a_line = s.split('')

		nts_of_a_line.map{|e|

			if (e =~ /[RWMKSYHBDVN]/)
				translate_hash[e][1] += 1
				e = translate_hash[e][0][translate_hash[e][1]%translate_hash[e][0].length]
			end

			res << e

		}

		@seq_fasta=res.compact.join
	end
  
	def clean_orfs
		@orfs=[]
	end

	def reset_classification
		@type = UNKNOWN
		@status = FALSE
	end

	def clean_warnings
		@warnings = []
	end

	def clean_annotations
		@annotations = []
	end

	def get_acc
		acc=hit.acc
		return acc
	end

	def get_pident
		pident=hit.ident
		return pident
	end

	def format_chimera!
		@hit = []
	end
	
	def warnings(warn)
		if warn.class.to_s == 'Array'
			warn.each do |w|
				@warnings << check_warn(w)
			end
		else
			@warnings << check_warn(warn)
		end
	end

	def clone_warnings(array_warnings)
		array_warnings.map{|warn| @warnings << warn.dup}
	end

	def check_warn(warn)
		check = warn
		replace = nil
		if warn.class.to_s == 'Array'
			check = warn.shift # Take warning tag message
			replace = warn # Take values to replace in message
		end

		message = $warnings_hash[check]
		if message.nil?
			message = check # If not exists the message
		end

		if !replace.nil?
			message = message.dup # Duplicate memory to avoid overwrite original warning hash messages
			replace.each do |rep|
				message.sub!('(*replace*)',"#{rep}") #message variable
			end
		end
		return message
	end

	def test_code(test_code)
		@t_code = test_code
		if @t_code >= 0.95
			@status = TRUE
		end
	end

	def get_fasta(seq)
		fasta = ">#{@seq_name}\n#{seq}"
		return fasta
	end

	def write_info(output_files) # Output_files is a hash
		if @save_fasta
			output_files['seqs'].puts get_fasta(@seq_fasta)	
		end
		if !@fpkm.empty?
			output_files['fpkm'].puts "#{@seq_name}\t#{@fpkm.join("\t")}"
		end
		if !@coverage_analysis.empty?
			all_coverages = @coverage_analysis.last
			output_files['coverage'].puts "#{@seq_name}\t#{@coverage_analysis[0..@coverage_analysis.length-1].join("\t")}"
		end

		case @type
		when OTHER
			write_other(output_files[@type])
		when CHIMERA
			write_chimera(output_files[@type])
		when UNMAPPED
			write_unmapped(output_files[@type])
		when MISASSEMBLED
			write_misassembled(output_files[@type])
		when UNKNOWN
			write_unknown(output_files[@type])
		when COMPLETE .. INTERNAL
			write_prot_annot(output_files['db'])
			write_prot_seq(output_files['prot'])
			write_nt_seq(output_files['nts'])
			write_align(output_files['align'])
		when NCRNA
			write_ncrna(output_files[@type])
		when CODING
			write_coding(output_files[@type])
		else
			if @type != FAILED
				raise "#{@type} is an incorrect type"
			end
		end
	end

	def all_warns
		all = @warnings.join(' ')
		return all
	end

	def write_other(file)
		file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@db_name}\t#{all_warns}"
	end

	def write_unmapped(file)
		file.puts "#{@seq_name}\t#{@fasta_length}"
	end

	def write_chimera(file) #TODO : write 'SOLVED' tag
		@hit.each do |h|
			file.puts "#{@seq_name}\t#{@fasta_length}\t#{h.acc}\t#{@db_name}\t#{h.q_frame}\t#{h.e_val}\t#{h.ident}\t#{h.q_beg + 1}\t#{h.q_end + 1}\t#{h.s_beg + 1}\t#{h.s_end + 1}\t#{h.definition}"
		end
		file.puts
	end

	def write_misassembled(file)
		file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@db_name}"
	end

	def write_unknown(file)
		# ħit is an array. 2 => q_frame, 1 ORF end, 0 ORF beg
		if hit.class.to_s == 'Array'
			orf_beg = @hit[0]
			orf_end = @hit[1]
			q_frame = @hit[2]		
		else 
			orf_beg = '-'
			orf_end = '-'
			q_frame = '-'
		end
		file.puts "#{@seq_name}\t#{@fasta_length}\t#{@t_code}\t#{all_warns}\t#{q_frame}\t#{orf_beg}\t#{orf_end}"
	end

	def write_prot_annot(file)
		final_func_annot = Array.new(9, '-')
		if !@functional_annotations.empty?
			final_func_annot = @functional_annotations.values
		end
		file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@db_name}\t#{prot_annot_calification}\t#{@hit.e_val}\t#{@hit.ident}\t#{@hit.full_subject_length}\t#{@seq_aa.length}\t#{all_warns}\t#{@hit.q_frame}\t#{@hit.q_beg + 1}\t#{@hit.q_end + 1}\t#{@hit.s_beg + 1}\t#{@hit.s_end + 1}\t#{@hit.definition}\t#{final_func_annot.join("\t")}"
	end

	def write_ncrna(file)
		file.puts "#{@seq_name}\t#{@fasta_length}\t#{@hit.acc}\t#{@hit.e_val}\t#{@hit.ident}\t#{@hit.q_beg + 1}\t#{@hit.q_end + 1}\t#{@hit.s_beg + 1}\t#{@hit.s_end + 1}\t#{@hit.definition}"
	end

	def write_coding(file)
		# ħit is an array. 2 => q_frame, 1 ORF end, 0 ORF beg 
		calification = 'Putative'
		if @status
			calification = 'Sure'
		end
		file.puts "#{@seq_name}\t#{@fasta_length}\t#{calification}\t#{@t_code}\t#{@hit.last}\t#{@hit.first}\t#{@hit[1]}"
	end

	#Write complementary files
	def write_prot_seq(file)
		file.puts get_fasta(@seq_aa)
	end

	def write_align(file)
		tabs = (seq_name.length/8).ceil
		if tabs == 0
			tabs = 1
		end
		second_tab = 0
		if seq_name.length > 7
			second_tab = 1
		end
		file.puts "#{@seq_name}#{"\t"*tabs}#{@hit.q_seq}\n#{@hit.acc}#{"\t"*(tabs+second_tab)}#{@hit.s_seq}"
		file.puts
	end

	def write_nt_seq(file)
		file.puts "#{@seq_name}\t#{@fasta_length}\t#{@seq_nt}"
	end

	def calification
		type_description = nil
		case @type
			when FAILED
				type_description = 'Failed'
			when OTHER
				type_description = 'Other'
			when UNMAPPED
				type_description = 'Unmapped'
			when CHIMERA
				type_description = 'Chimera'
			when MISASSEMBLED
				type_description = 'Misassembled'
			when UNKNOWN
				type_description = 'Unknown'
			when COMPLETE
				type_description = 'Complete'
			when N_TERMINAL 
				type_description = 'N_terminal'
			when C_TERMINAL
				type_description = 'C_terminal'
			when INTERNAL
				type_description = 'Internal'
			when CODING
				type_description = 'Coding'
			when NCRNA
				type_description = 'NcRNA'
		end

	end

	def prot_annot_calification
		info = "#{calification} "	
		if @status
			info << 'Sure'
		else
			info << 'Putative'
		end
		return info
	end

	def show_alignment(h, nts, show_nts, original_query_coordinates = nil)
		puts "Prot id:\t#{h.acc}", "Alignment length:\t#{h.align_len} aa", "Subject   length:\t#{h.s_len} aa", "Query length:\t#{nts.length/3} aa"
		puts prot_annot_calification
		puts

		aa_unigen = nts[h.q_frame - 1 .. nts.length-1].translate
		index = contenidos_en_prot(h.q_seq, aa_unigen)

		# View desplacements 5-prime/align/3-prime
		subzone_align = nil
		if !original_query_coordinates.nil?
			subzone_align = {}
			if h.q_beg > original_query_coordinates.first #alignment has transferred characters to 5 prime
				subzone_align['beg'] = [original_query_coordinates.first, h.q_beg-3, 42] # -3 to exclude the last aa
			elsif h.q_beg < original_query_coordinates.first
				subzone_align['beg'] = [h.q_beg, original_query_coordinates.first-3, 46] #alignment has received characters from 5 prime
			end

			if h.q_end < original_query_coordinates.last #alignment has transferred characters to 3 prime
				subzone_align['end'] = [h.q_end, original_query_coordinates.last, 42]
			elsif h.q_end > original_query_coordinates.last
				subzone_align['end'] = [original_query_coordinates.last, h.q_end, 43] #alignment has received characters from 3 prime
			end
		end

		# Print 5 prime
		if index > 0 # 5 prime  exists
			aa_align = aa_unigen[0 .. index-1].split('')
			nt_align = nts[h.q_frame-1..h.q_beg-1]
			print_alignment(aa_align, nt_align, 36, show_nts, subzone_align)
			reduce_coordinates(subzone_align, aa_align, h)
		end

		# Print core alignment or protein
		aa_align = h.q_seq.split('')
		nt_align = nts[h.q_beg..h.q_end]
		print_alignment(aa_align, nt_align, 32, show_nts, subzone_align)
		reduce_coordinates(subzone_align, aa_align, h)

		# Print 3 prime
		gaps = h.q_seq.count('-')
		three_prime_beg = index+h.q_seq.length-gaps
		if aa_unigen.length > three_prime_beg # 3 prime  exists
			aa_align = aa_unigen[three_prime_beg .. aa_unigen.length-1].split('')
			fs = check_frame_shift(h)
			nt_align = nts[h.q_end+1-fs..nts.length-1]
			print_alignment(aa_align, nt_align, 33, show_nts, subzone_align)
		end

	end


	def print_alignment(aa_align, nt_align, color, show_nts, mark_subzone = nil)
		original_color = color
		c={	'GCT'=>'A','GCC'=>'A','GCA'=>'A','GCG'=>'A',
			'CGT'=>'R','CGC'=>'R','CGA'=>'R','CGG'=>'R','AGA'=>'R','AGG'=>'R',
			'AAT'=>'N','AAC'=>'N',
			'GAT'=>'D','GAC'=>'D',
			'TGT'=>'C','TGC'=>'C',
			'CAA'=>'Q','CAG'=>'Q',
			'GAA'=>'E','GAG'=>'E',
			'GGT'=>'G','GGC'=>'G','GGA'=>'G','GGG'=>'G',
			'CAT'=>'H','CAC'=>'H',
			'ATT'=>'I','ATC'=>'I','ATA'=>'I',
			'TTA'=>'L','TTG'=>'L','CTT'=>'L','CTC'=>'L','CTA'=>'L','CTG'=>'L',
			'ATG'=>'M',
			'AAA'=>'K','AAG'=>'K',
			'TTT'=>'F','TTC'=>'F',
			'CCT'=>'P','CCC'=>'P','CCA'=>'P','CCG'=>'P',
			'TCT'=>'S','TCC'=>'S','TCA'=>'S','TCG'=>'S','AGT'=>'S','AGC'=>'S',
			'ACT'=>'T','ACC'=>'T','ACA'=>'T','ACG'=>'T',
			'TGG'=>'W',
			'TAT'=>'Y','TAC'=>'Y',
			'GTT'=>'V','GTC'=>'V','GTA'=>'V','GTG'=>'V',
			'TAG'=>'*','TGA'=>'*','TAA'=>'*'}
		
		nt_line = ''
		aa_line = ''
		gaps = 0
		count = 0
		aa_align.each_with_index do |aa, n|
			if aa == '-'
				nt_line << '---'
				gaps += 1
			else
				# Check aa with codon
				codon_window = (n-gaps)*3
				codon = nt_align[codon_window..codon_window+2]
				nt_line << "#{codon}"
				if aa.upcase != 'X' 
					if codon.upcase.include?('N')
						traslated_aa = '-'
					else	
						traslated_aa = c[codon]
					end
					if traslated_aa != '-' && traslated_aa != aa
						puts "#{traslated_aa} #{aa}"
						aa = '?'
					end
				end
			end
			if !mark_subzone.nil?
				nts_coordenate = (n-gaps)*3
				mark_subzone.values.each do |subzone|
					if nts_coordenate >= subzone[0] && nts_coordenate <= subzone[1] #0 => first coordenate, 1 => second coordenate
						color = subzone.last
					end
				end
			end
			space = nil
			if show_nts
				space = ' '
			end
			aa_line << "\e[#{color}m#{space}#{aa}#{space}\e[0m"
			color = original_color
			line_length = 60
			if (n+1) % line_length == 0 || n+1 == aa_align.length
				count = n + 1
				print "#{count}\t"
				puts aa_line
				if show_nts
					print "#{count*3}\t"
					puts nt_line
				end
				aa_line = ''
				nt_line = ''
			end
		end

	end

	def reduce_coordinates(subzone_align, aa_align, h)
		if !subzone_align.nil?
			aligned = 3 * aa_align.length + h.q_frame-1
			subzone_align.values.each do |subzone|
				subzone[0]-= aligned 
				subzone[1]-= aligned 
			end
		end
	end

	def area_without_annotation?
		if @hit.class == Array
			hit = @hit.first
		else
			hit = @hit
		end
		upstream_annotation_space = hit.q_beg 
		downstream_annotation_space = @fasta_length - hit.q_end
		if upstream_annotation_space >= 150 || downstream_annotation_space >= 150
			@area_without_annotation = TRUE
		end
		return @area_without_annotation
	end

	def clone
		new_seq = self.dup
		new_seq.clean_annotations
		new_seq.clean_warnings
		new_seq.clean_orfs
		new_seq.clone_warnings(@warnings)
		new_seq.clone_annotations(@annotations)
		return new_seq
	end

	def clone_annotations(array_annotations)
		array_annotations.map{|annotation| @annotations << annotation.dup}
	end

	def unmapped?
		res = FALSE
		res = TRUE if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads 
		return res
	end
end