require 'common_functions' include CommonFunctions class UneLosHit attr_reader :output_seq, :final_hit, :msgs, :number_x, :wrong_seq, :is_ok, :q_index_start, :full_prot def initialize(q, query_fasta, pident_threshold) (mismas_ids_array, query_fasta, wrong_seq) = hits_misma_id(q, query_fasta) @wrong_seq = wrong_seq @mismas_ids_array = mismas_ids_array @msgs = '' @number_x = 0 num_x = '' @output_seq = query_fasta if (mismas_ids_array.count > 1) mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query @final_hit = mismas_ids_array[0].dup mismas_ids_array.each do |hit| if (hit.ident >= pident_threshold) # if ($verbose) # puts "#{hit.acc}\tsc:#{hit.score}\teval:#{hit.e_val}\tid:#{hit.ident}\tframe:#{hit.q_frame}\tqb:#{hit.q_beg + 1}\tqe:#{hit.q_end + 1}\tsb:#{hit.s_beg + 1}\tse:#{hit.s_end + 1}" # puts "#{query_fasta[hit.q_beg..hit.q_end].translate}" # end same = same_hit(hit) if (!same) if (@final_hit.q_frame == hit.q_frame) same_frame_hits(hit) else # condiciones para corregir el frame en el que tiene que continuar la seq de nt correccion_x = 0 if (@final_hit.q_frame - hit.q_frame == 1) correccion_x = 1 elsif (@final_hit.q_frame - hit.q_frame == 2) correccion_x = 2 elsif (@final_hit.q_frame - hit.q_frame == -1) correccion_x = 2 elsif (@final_hit.q_frame - hit.q_frame == -2) correccion_x = 1 end # las secuencias solapan en el query if ((@final_hit.q_end >= hit.q_beg) && (@final_hit.q_end < hit.q_end)) && (hit.q_end > @final_hit.q_end + 15) overlapped_hits(hit, correccion_x, q) # puts "---#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}" elsif (@final_hit.q_end < hit.q_beg) && (hit.q_end > @final_hit.q_end + 15) # las secuencias estan separadas en el query separated_hits(hit, correccion_x, q) # puts "----------#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}" else @msgs = 'warning!, putative chimeric sequence! or repetitive structure' # puts "\nWARNING!, PUTATIVE CHIMERIC SEQUENCE !!!\n\n" end end end end # pident end else @final_hit = mismas_ids_array[0].dup # if ($verbose) # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}" # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}" # end end # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}" # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}" @full_prot = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate # puts "\nfull_prot_ulh: \n#{full_prot}" (@is_ok, @q_index_start) = contenidos_en_prot(@final_hit, @full_prot, q) end def same_hit(hit) same = false if (hit.score == @final_hit.score && hit.q_beg == @final_hit.q_beg && hit.q_end == @final_hit.q_end && hit.s_beg == @final_hit.s_beg && hit.s_end == @final_hit.s_end) same = true end return same end def same_frame_hits(hit) @final_hit.q_beg = @final_hit.q_beg @final_hit.q_end = hit.q_end @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min @final_hit.s_end = [@final_hit.s_end,hit.s_end].max end def overlapped_hits(hit,correccion_x,q) # puts q.inspect # puts "los hits solapan!!!" if (@msgs.empty?) @msgs = "Overlapping hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, " else @msgs += " and overlapping frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, " end # ------------------------------------- preparamos los nt y aas que solapan overlapped_nt = 0 overlapped_nt = (@final_hit.q_end - hit.q_beg + 1) overlapped_aas = 0 overlapped_aas = (@final_hit.s_end - hit.s_beg + 1) # ------------------------------------- if (overlapped_nt % 3 == 1) overlapped_nt += 2 elsif (overlapped_nt % 3 == 2) overlapped_nt += 1 end # ------------------------------------- calculamos el numero de x a aƱadir @number_x = (((correccion_x + overlapped_nt)/3)+1)*3 @number_x_aa = overlapped_aas num_x = '' num_x_aa = '' if (@number_x.to_i > 0) num_x = 'x'*@number_x.to_i elsif (@number_x.to_i == 0) num_x = '' else @msgs = "ERROR#2 unexpected negative index in x_number, " # puts "ERROR#2 unexpected negative index in x_number" end if (@number_x_aa.to_i > 0) num_x_aa = 'x'*@number_x_aa.to_i elsif (@number_x_aa.to_i == 0) num_x_aa = '' else num_x_aa = 'x'*@number_x_aa.to_i.abs @msgs = "Warning!, your query overlaps and the subject is separated, " end if (@number_x_aa.to_i >= 0) @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.q_seq[overlapped_aas..hit.q_seq.length]}" @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.s_seq[overlapped_aas..hit.s_seq.length]}" else @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1]}#{num_x_aa}#{hit.q_seq[0..hit.q_seq.length]}" @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1]}#{num_x_aa}#{hit.s_seq[0..hit.s_seq.length]}" end output_seq_tmp = "#{@output_seq[0..(@final_hit.q_end - overlapped_nt)]}#{num_x}#{@output_seq[(hit.q_beg + overlapped_nt)..(@output_seq.length)]}" full_prot_tmp = output_seq_tmp[@final_hit.q_frame-1, output_seq_tmp.length+1].translate (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q) @output_seq = output_seq_tmp.dup @final_hit.q_beg = @final_hit.q_beg @final_hit.q_end = hit.q_end @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min @final_hit.s_end = [@final_hit.s_end,hit.s_end].max end def separated_hits(hit,correccion_x,q) # puts "los hits estan separados!!!" if (@msgs.empty?) @msgs = "Separated hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, " else @msgs += " and possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, " end # PROBAR CON % 3!!!!!!!!!!!!!!!!!!!!!!!! @number_x = (hit.q_beg - @final_hit.q_end - 1) # @number_x = (hit.q_beg - @final_hit.q_end - 1) + correccion_x if (@number_x % 3 == 1) @number_x += 2 elsif (@number_x % 3 == 2) @number_x += 1 end if (@number_x.to_i > 0) num_x = 'x'*@number_x.to_i num_x_aa = 'x'*(@number_x.to_i/3) elsif (@number_x.to_i == 0) num_x = '' num_x_aa = '' else @msgs = "ERROR#2 unexpected negative index in x_number" # puts "ERROR#2 unexpected negative index in x_number" end @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1] # @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1] @final_hit.score += 1 @final_hit.q_beg = @final_hit.q_beg @final_hit.q_end = hit.q_end @final_hit.s_beg = @final_hit.s_beg @final_hit.s_end = hit.s_end @final_hit.q_seq = "#{@final_hit.q_seq[0, @final_hit.q_seq.length]}#{num_x_aa}#{hit.q_seq[0, hit.q_seq.length]}" @final_hit.s_seq = "#{@final_hit.s_seq[0, @final_hit.s_seq.length]}#{num_x_aa}#{hit.s_seq[0, hit.s_seq.length]}" num_x = '' num_x_aa = '' full_prot_tmp = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate # puts "\n\nfull_prot_tmp:#{full_prot_tmp}" # puts "\n\n#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1]}" (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q) # puts "#{q.query_def}: is_ok_tmp: #{is_ok_tmp} separated hits" end # creamos un array en el que esten solo los hits con la misma id. def hits_misma_id(q, query_fasta_ori) # Se les hace la reveso complementaria si es necesario, y se comprueba que para un mismo query no hay hits en sentidos diferentes wrong_seq = false misma_id = [] query_fasta = query_fasta_ori.dup frame_ori = q.hits[0].q_frame q.hits.each do |h| # puts "#{q.query_def} f_ori :#{frame_ori} y h_f: #{h.q_frame}" reversed_hit = false # con respecto al primer hit, que es el de mayor score o evalue if (h.acc == q.hits[0].acc) # comprobar si los frames tiene el mismo sentido if ((frame_ori < 0 && h.q_frame > 0) or (frame_ori > 0 && h.q_frame < 0)) wrong_seq = true reversed_hit = true # puts "The sequence #{q.query_def} contains sense and antisense hits!!! #{frame_ori} y #{h.q_frame}" end if (reversed_hit == false) if (h.q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta (query_fasta, h.q_frame, h.q_beg, h.q_end) = reverse_seq(query_fasta_ori, h.q_frame, h.q_beg, h.q_end) h.reversed = true end misma_id.push h end end end return [misma_id, query_fasta, wrong_seq] end end