module CommonFunctions def contenidos_en_prot(key_seq, full_prot) full_prot = full_prot.gsub(/[\-Xx]/,'-') compare_prot = key_seq.gsub(/[\-Xx]/,'-') q_index_start = full_prot.index(compare_prot) #Full match between hit.q_seq and full_prot (unigene) if q_index_start.nil? #There is gaps that unables the full match q_index_start = match_with_ungapped_reference(full_prot, compare_prot) if q_index_start.nil? && full_prot.include?('-') diff = full_prot.length - compare_prot.length if scan_sequences(full_prot.split(''), compare_prot.split('')) == compare_prot.length q_index_start = 0 end if diff >0 && scan_sequences(full_prot.split(''), compare_prot.split(''), diff) == compare_prot.length q_index_start = diff end if q_index_start.nil? q_index_start = match_with_gapped_reference(full_prot, compare_prot) end end if q_index_start.nil? q_index_start = 0 end end return q_index_start end def match_with_gapped_reference(full_prot, compare_prot) q_index_start = nil fragments_array = full_prot.split(/\-+/) fragments_array.each_with_index do |seq, i| if seq.length > 4 compare_prot_index = compare_prot.index(seq) if compare_prot_index.nil? # In cases that no match by gaps seq =seq[0..4] compare_prot_index = compare_prot.index(seq) end if !compare_prot_index.nil? q_index_start = full_prot.index(seq) if i > 0 q_index_start, compare_prot_index = extend_match(full_prot, compare_prot, q_index_start, compare_prot_index) end break end end end return q_index_start end def extend_match(full_prot, compare_prot, q_index_start, compare_prot_index) full_prot_substring = full_prot[0..q_index_start-1].reverse.split('') compare_prot_substring = compare_prot[0..compare_prot_index-1].reverse.split('') extend_match = scan_sequences(full_prot_substring, compare_prot_substring) q_index_start -= extend_match compare_prot_index -= extend_match return q_index_start, compare_prot_index end def scan_sequences(ref_seq, compare_seq, diff = 0) extend_match = 0 ref_seq.each_with_index do |char,i| if i >= diff compare_char = compare_seq[extend_match] if compare_char.nil? || char != compare_char && char != '-' && compare_char != '-' break end extend_match += 1 end end return extend_match end def match_with_ungapped_reference(full_prot, compare_prot) q_index_start = nil fragments_array = compare_prot.split(/\-+/) fragments_array.each_with_index do |seq, i| if q_index_start.nil? && seq.length > 4 q_index_start = full_prot.index(seq) if i > 0 && !q_index_start.nil? q_index_start = refine_match(seq, compare_prot, q_index_start) # Correction if first seq isn't enough large end break end end return q_index_start end def refine_match(subseq, seq, q_index_start) location_seq = seq.index(subseq) gaps_on_location = seq[0..location_seq].count('-') q_index_start -= location_seq - gaps_on_location # Correction if first seq isn't enough large return q_index_start end def reverse_seq(query_fasta, hit) hit.q_frame = -hit.q_frame hit.q_end = query_fasta.length - 1 - hit.q_end hit.q_beg = query_fasta.length - 1 - hit.q_beg hit.reversed = true query_fasta = query_fasta.complementary_dna # ESTO REALMENTE HACE LA REVERSO COMPLEMENTARIA. if hit.class.to_s == 'ExoBlastHit' hit.q_frameshift.map!{|position, num_nts| reversed_position = query_fasta.length - 1 - position [reversed_position, num_nts] } end return query_fasta end def corrige_frame(ref_frame,ref_start,ref_end) if (ref_frame.abs == 2) ref_start = ref_start + 1 ref_end = ref_end + 1 elsif (ref_frame.abs == 3) ref_start = ref_start + 2 ref_end = ref_end + 2 end return [ref_start,ref_end] end def check_frame_shift(hit) fs = 0 prot_length_in_nts = hit.q_end-hit.q_beg+1 fs = prot_length_in_nts%3 return fs end end