require 'common_functions' include CommonFunctions class UneLosHit attr_reader :output_seq, :final_hit, :msgs, :number_x, :is_ok, :q_index_start, :full_prot def initialize(full_hit, query_fasta) #puts 'BEG ___________________' #full_hit.map{|hsp| puts hsp.inspect} mismas_ids_array, query_fasta = hits_misma_id(full_hit, query_fasta) #puts '..................' @mismas_ids_array = mismas_ids_array @msgs = [] @output_seq = query_fasta if mismas_ids_array.count > 1 mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query @final_hit = mismas_ids_array.shift # We take first hsp like reference for unigene reconstruction #puts @output_seq.length #puts "\e[32m#{@final_hit.inspect}\e[0m" mismas_ids_array.each do |hit| #puts '.....', "\e[31m#{hit.inspect}\e[0m" ##if @final_hit.q_frame == hit.q_frame #Same frame ## puts "\e[33mSame Frame\e[0m" ## same_frame_hits_query(hit) if overlapping_hits?(hit) #Diff frame if @msgs.empty? @msgs << ['OverlapHit', @final_hit.q_end + 1, hit.q_beg + 1] else @msgs << ['AndOverlapHit', @final_hit.q_end + 1, hit.q_beg + 1] end #puts "\e[33mOverlapped hits\e[0m" overlapped_hits_query(hit) elsif separated_hits?(hit) #Diff frame if @msgs.empty? @msgs << ['SeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1] else @msgs << ['AndSeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1] end #puts "\e[33mSeparated hits\e[0m" separated_hits(hit) #Diff frame end #puts @output_seq.length #puts '.....' #puts "\e[32m#{@final_hit.inspect}\e[0m" end else @final_hit = mismas_ids_array.shift end #puts 'END ___________________' #puts @final_hit.inspect end def same_frame_hits_query(hit) @final_hit.q_seq = @output_seq[@final_hit.q_beg..@final_hit.q_end].translate @final_hit.q_end = hit.q_end @final_hit.s_end = hit.s_end @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1 end def overlapped_hits_query(hit) overlapped_nts = @final_hit.q_end - hit.q_beg + 1 add_nt = ajust_nt(hit.q_frame-1) # Fix frame-shift @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * (overlapped_nts + add_nt) + @output_seq[@final_hit.q_end+1.. @output_seq.length-1] @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate @final_hit.q_end = hit.q_end + add_nt @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min @final_hit.s_end = [@final_hit.s_end, hit.s_end].max @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1 @final_hit.q_len = @output_seq.length end def separated_hits_query(hit) separated_nts = hit.q_beg - @final_hit.q_end + 1 add_nt = ajust_nt(separated_nts) # Fix frame-shift @output_seq = @output_seq[0..@final_hit.q_end - separated_nts] + 'n' * (separated_nts + add_nt) + @output_seq[@final_hit.q_end+1..@output_seq.length-1] @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate @final_hit.q_end = hit.q_end + add_nt @final_hit.s_end = hit.s_end @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1 @final_hit.q_len = @output_seq.length end def same_frame_hits(hit) add = (hit.s_beg - @final_hit.s_end) + 1 nt_add = add *3 @final_hit.q_seq = @final_hit.q_seq + 'x' * add + hit.q_seq @final_hit.s_seq = @final_hit.s_seq + 'x' * add + hit.s_seq @output_seq = @output_seq[0..@final_hit.q_end-nt_add] + 'n'*nt_add + @output_seq[hit.q_beg+1..@output_seq.length-1] @final_hit.q_end = hit.q_end @final_hit.s_end = hit.s_end @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1 end def overlapped_hits(hit) #Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes overlapped_aas = @final_hit.s_end - hit.s_beg + 1 overlapped_nts = @final_hit.q_end - hit.q_beg + 1 align_len_final_hit = @final_hit.q_seq.length hit_gaps_query = @final_hit.q_seq[align_len_final_hit-overlapped_aas..align_len_final_hit].count('-') hit_gaps_subject = @final_hit.s_seq[0..align_len_final_hit].count('-') total_gaps = (hit_gaps_query - hit_gaps_subject).abs # Gaps aƱaden aa q no existen, x lo q han de descontarse nt_discount = (overlapped_aas ) * 3 absolute_overlap = 1 if nt_discount < 0 #Not overlap on subject overlapped_aas = overlapped_aas.abs absolute_overlap = 0 #Don't cut q_seq and s_seq, because there is query overlap but there isn't subject overlap nt_discount = @final_hit.q_end - hit.q_beg + 1 nt_discount += ajust_nt(nt_discount) end add_nt = overlapped_nts + ajust_nt(hit.q_frame-1) @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * add_nt + @output_seq[@final_hit.q_end+1.. @output_seq.length-1] #q_seq and s_seq are aa sequences final_hit_upper_bound = @final_hit.q_seq.length - 1 - overlapped_aas * absolute_overlap @final_hit.q_seq = @final_hit.q_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.q_seq[overlapped_aas * absolute_overlap .. hit.q_seq.length-1] @final_hit.s_seq = @final_hit.s_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.s_seq[overlapped_aas * absolute_overlap .. hit.s_seq.length-1] @final_hit.q_end = hit.q_end @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min @final_hit.s_end = [@final_hit.s_end, hit.s_end].max @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1 end def separated_hits(hit) number_x = hit.q_beg - @final_hit.q_end - 1 number_x += ajust_nt(number_x) num_x = '' num_x_aa = '' if number_x > 0 num_x = 'n'*number_x num_x_aa = 'x'*(number_x/3) end @output_seq = @output_seq[0..@final_hit.q_end-1] + num_x + @output_seq[hit.q_beg-1..@output_seq.length-1] @final_hit.q_seq = @final_hit.q_seq[0, @final_hit.q_seq.length] + num_x_aa + hit.q_seq[0, hit.q_seq.length] @final_hit.s_seq = @final_hit.s_seq[0, @final_hit.s_seq.length] + num_x_aa + hit.s_seq[0, hit.s_seq.length] @final_hit.q_end = hit.q_end @final_hit.s_end = hit.s_end @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1 end # creamos un array en el que esten solo los hits con la misma id. def hits_misma_id(full_hit, query_fasta_ori) # Se les hace la reverso complementaria si es necesario misma_id = [] query_fasta = query_fasta_ori.dup #frame_ori = q.hits.first.q_frame full_hit.each do |h| if h.acc == full_hit.first.acc # comprobar si los frames tienen el mismo sentido #if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0 if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta query_fasta = reverse_seq(query_fasta_ori, h) h.reversed = true end misma_id << h #end end end return misma_id, query_fasta end def overlapping_hits?(hit) overlap = false if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end overlap = true end return overlap end def separated_hits?(hit) separated=false if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end separated = true end return separated end def ajust_nt(nt) # Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3) add=0 if nt % 3 == 1 add = 2 elsif nt % 3 == 2 add = 1 end return add end end