# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# 'Software'), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

require 'scbi_blast'
require 'fl_string_utils.rb'

OPERATION = 0
QUERY = 1
TARGET = 2

class ExoBlastHit < BlastHit
	attr_accessor :q_frameshift, :s_frameshift
	def initialize(start_target, ends_target, start_query, ends_query)
		super(start_target, ends_target, start_query, ends_query)
		@s_frameshift=[]
		@q_frameshift=[]
	end
end

# Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
class ExonerateResult

  # Parser initialization
  def initialize(input, seqs= nil, query_seqs = nil, all = true)
    @querys = []
    @seqs = seqs #unigenes
    @prot_seqs = query_seqs#prot

    if input.is_a?(Array)
    	input.each do |file|
	      	parse_file(File.open(file).readlines, all)
     	end
    else
      parse_file(File.open(input).readlines, all)
    end
    query_name=''
  end

  	def parse_file(lines, all)
	  	lines_parsed=[] 
	    lines_parsed={} if !all
	    lines.each do |line|
			if line=~ /^vulgar:/
				line.chomp!
				fields=line.split(' ', 11)
				features={'query_id'=> fields[1], 'query_start_align'=> fields[2].to_i, 'query_end_align'=> fields[3].to_i, 'query_strand'=> fields[4],'target_id'=> fields[5], 'target_start_align'=> fields[6].to_i, 'target_end_align'=> fields[7].to_i, 'target_strand'=> fields[8], 'score'=> fields[9].to_i, 'align_data'=> fields[10]}
				if all
					lines_parsed << features
				else
				  if !lines_parsed.key?(features['target_id']) # Añadir valor si no existe
				    lines_parsed[features['target_id']]=features
				  else
				    if features['score']>lines_parsed[features['target_id']]['score'] # Si ya existe una query, ver si la nueva presenta un mayor score y reemplazar la antigua
				    	lines_parsed[features['target_id']]=features
				    end
				  end
				end
			end
		end		
		convert_parsed_lines(lines_parsed)	
  	end
  
	def convert_parsed_lines(lines_parsed)
		last_query = nil
		query = nil
		lines_parsed.each_with_index do |line|
			begin
				if lines_parsed.class.to_s=='Array'
					align_data=line['align_data']
					features=line
				else #hash
					align_data=line[1]['align_data']
					features=line[1]        
				end
				tags = align_data.scan(/([MFG53S]) ([0-9]+) ([0-9]+)/)
				tags.map!{|tag| [tag[0], tag[1].to_i, tag[2].to_i]}
				if features['target_id'] != last_query
					last_query = features['target_id']
					query = BlastQuery.new(features['target_id'])
					@querys << query
				end
			  	hiting(features,tags, query)
			rescue
				puts "Result: #{features['target_id']} => #{features['query_id']} hasn't been parsed\n#{line}"
			end
		end    
	end
   
   #this method only works fine with --model protein2dna parameter of exonerate
 	def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits		
		do_align = false
		do_align = true if !@prot_seqs.nil? && !@seqs.nil?
		start_target = features['target_start_align']#Unigen
		start_query = features['query_start_align'] #proteina
		ends_target = features['target_end_align']
		ends_query = features['query_end_align']-1 # -> Exonerate don't set to 0 position the ends of target and query
		if features['target_strand'] == '-' #-> Exonerate don't set to 0 position the ends of target and query
			start_target -= 1 # Start target is end target when mathc is in reversed complementary strand
		else
			ends_target -= 1
		end
		hit = ExoBlastHit.new(start_target+1, ends_target+1, start_query+1, ends_query+1)
		define_hit_parameters(hit, features, tags)
		query.add_hit(hit)
		
		#Define alignment and blast like parameters
		target_alignment = ''
		query_alignment = ''
		counter_target = start_target
		counter_query = start_query
		if do_align #get seqs
			query_seq = @prot_seqs[features['query_id']]
			target_seq = @seqs[features['target_id']]
		end
		counter_target, target_seq = do_reverso_complementary(counter_target, target_seq) if features['target_strand'] == '-'
		query_frameshift = []
		target_frameshift = []
		gap_shift = 0
		#puts features['query_id']+ ' ' +features['target_strand'], '-----------------------'
		tags.each_with_index do |tag, n_operation|
			#puts tag.inspect
			if do_align
				gap_shift = 0 if tag[OPERATION] != 'G'
				query_alignment << query_seq[counter_query, tag[QUERY]]
				target_alignment << target_seq[counter_target, tag[TARGET]].translate
			end
			if tag[OPERATION] == 'F'
				if tag[TARGET] > 0 && tag[TARGET] < 3 #true FRAMESHIFT
					gap_shift += 1
					if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it
						query_alignment <<  '-' if do_align
					end
				else
					query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
				end
				query_frameshift << counter_query
				fs_counter_target = counter_target
				fs_counter_target = target_seq.length - counter_target if features['target_strand'] == '-' # ESto es un apaño, habria que plantear el parseo de las reversas como reduccion en el contador del formato del exonerate, en vez de como adiccion
				if tag[TARGET] > 3
					real_fs = tag[TARGET]%3
					real_gap = tag[TARGET] - real_fs
					fs = [fs_counter_target + real_gap, real_fs]
				else
					fs = [fs_counter_target, tag[TARGET]]
				end
				target_frameshift << fs
			elsif tag[OPERATION] == 'G'
				query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
				diff = tag[QUERY] - gap_shift
				target_alignment << '-' * diff if do_align && diff > 0
				gap_shift = 0
			end	
			counter_query += tag[QUERY]
			counter_target += tag[TARGET]
		end
		hit.s_frameshift = query_frameshift
		hit.q_frameshift = target_frameshift

		#puts "\e[33m#{target_alignment}\e[0m", "\e[36m#{query_alignment}\e[0m"
		if do_align
			hit.q_seq = target_alignment
			hit.s_seq = query_alignment
			hit.align_len = query_alignment.length
			hit.ident = set_ident(target_alignment,query_alignment)
		end
	end #def

	def do_reverso_complementary(counter_target, target_seq)
		counter_target = target_seq.length - 1 - counter_target 
		target_seq = target_seq.complementary_dna
		return counter_target, target_seq
	end

	def set_ident(target_alignment, query_alignment)
		matchs = 0
		position = 0
		target_alignment.each_char do |char|
			matchs +=1 if char == query_alignment[position]
			position +=1
		end
		perc_ident = ('%.2f' % (matchs*100.0/target_alignment.length)).to_f
		return perc_ident
	end

	def define_hit_parameters(hit, features, tags)
		hit.gaps = 0
		tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'}
		hit.reversed = false
		hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1 
		hit.mismatches=0
		hit.e_val=0
		hit.bit_score=0
		hit.score = features['score']
		hit.s_frame = nil
		strand = 1
		strand = -1 if features['target_strand'] == '-'
		hit.q_frame = (((features['target_start_align']) % 3) +1) *strand
		hit.subject_id = features['query_id']
		hit.full_subject_length=0
		hit.definition=''
		hit.acc=features['query_id']
		hit.q_seq=''
		hit.s_seq=''
	end

  # inspect results
  def inspect
    res = "Exonerate results:\n"
    res+= '-'*20
    res+= "\nQuerys: #{@querys.count}\n"
    @querys.each{|q| res+=q.inspect+"\n"}
    return res
  end

  # find query by name
  def find_query(querys,name_q)
    #  newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
    new_q=nil

    if !querys.empty?
      new_q=querys.find{|q| (q.query_id==name_q)}
    end

    return new_q
  end

  # check if there are querys
  def empty?

    return @querys.empty?
  end

  # get query count
  def size
    @querys.size
  end

  attr_accessor :querys
end