require 'common_functions' require 'scbi_plot' include CommonFunctions class TestCode def initialize(seq) name='' t_code='' status='' ref_start=0 ref_end=seq.seq_fasta.length ref_frame='' orf='' protein = '' p_long = 0 if (seq.fasta_length < 200) ref_name = seq.seq_name ref_code = 0.0 ref_frame = 0 ref_status = 'unknown' ref_orf = '' ref_msgs = 'Sequence length < 200 nt' seq.annotate(:tcode_unknown,"#{ref_name}\t#{seq.fasta_length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true) # seq.annotate(:tcode,"#{ref_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true) else # para probar tescode con toda la secuencia, en lugar de con los ORFs ---------------------------------------------------------------------- # sense_strand = seq.seq_fasta.upcase # antisense_strand = sense_strand.complementary_dna # (t_code,t_status) = testCode_exec(sense_strand) # ref_frame = 1 # ref_msgs = '' # (as_t_code,as_t_status) = testCode_exec(antisense_strand) # # puts "#{seq.seq_name}: t_code: #{t_code}, t_status: #{t_status}, as_t_code: #{as_t_code}, as_t_status: #{as_t_status}" # seq.annotate(:tcode,"#{seq.seq_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{t_status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true) # -------------------------------------------------------------------------------------------------------------------------------- # see add_region filter (name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame) = t_code(seq) if (status == :unknown) seq.annotate(:tcode_unknown,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true) else seq.annotate(:tcode,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true) end # if (ref_msgs.nil?) # ref_msgs = '' # end # # if (stop_before_start) # ref_msgs += "There is a STOP codon before ATG. " # end # # if (more_than_one_frame) # ref_msgs += "Possible frame error by an ins/del" # end # # if (status.to_s =~ /^[CN]\_terminus/) || (status == :Internal) # tmp_status = "Putative #{status.to_s}" # else # tmp_status = status.to_s # end # # # if (!orf.nil?) && (!more_than_one_frame) # protein = orf.translate # p_long = protein.length - 3 # seq.annotate(:tcode,"#{name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{tmp_status}\t#{t_code}\t\t\t#{p_long}\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t#{protein}",true) # else # seq.annotate(:tcode,"#{name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{tmp_status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true) # end end end def t_code(seq) name = seq.seq_name tc_fasta = seq.seq_fasta # generamos todos los ORFs de cada secuencia uncomplete_orf_finder(seq) minus_strand = [] plus_strand = [] # puts "**************************************************" # puts "#{name} #{tc_fasta.length}" # ordenamos los ORFs empezando desde 5' a 3' y luego separamos los de cada hebra en un array distinto seq.orfs.sort{|x,y| x.t_start <=> y.t_start }.each do |one_orf| (t_code,t_status)=testCode_exec(one_orf.seq) one_orf.status = t_status one_orf.score = t_code if (one_orf.frame < 0) minus_strand.push one_orf elsif (one_orf.frame > 0) plus_strand.push one_orf end end #----------------------------------- Plus strand if (!plus_strand.empty?) # puts "--------------Plus strand:" best_plus_region = compare_regions(plus_strand,tc_fasta) if (!best_plus_region.nil?) # puts "#{best_plus_region.seq.length}, #{best_plus_region.status}, #{best_plus_region.type}, #{best_plus_region.t_start} - #{best_plus_region.t_end}, #{best_plus_region.frame}" # puts best_plus_region.seq end end #----------------------------------- Minus strand if (!minus_strand.empty?) # puts "--------------Minus strand:" best_minus_region = compare_regions(minus_strand,tc_fasta) if (!best_minus_region.nil?) # puts "#{best_minus_region.seq.length}, #{best_minus_region.status}, #{best_minus_region.type}, #{best_minus_region.t_start} - #{best_minus_region.t_end}, #{best_minus_region.frame}" # puts best_minus_region.seq end end # obtenemos la region codificante mas larga de ambas hebras best_region = nil if (!best_plus_region.nil?) if (!best_minus_region.nil?) if (best_minus_region.seq.length > best_plus_region.seq.length) best_region = best_minus_region else best_region = best_plus_region end else best_region = best_plus_region end elsif (!best_minus_region.nil?) best_region = best_minus_region end # puts "--------------Best region:" # puts " --------------------------------- #{best_region.seq.length}, #{best_region.status}, #{best_region.type}, #{best_region.t_start} - #{best_region.t_end}, #{best_region.frame}" # comprobamos el tipo de ORF segun si tiene un codon de parada antes del atg if (!best_region.nil?) && (best_region.seq.length >= 200) if (best_region.type == :Complete) && (!best_region.stop_codon) best_region.type = 'Putative Complete' ref_msgs = 'NO STOP codon before ATG. ' elsif (best_region.type == :N_terminus) && (!best_region.stop_codon) ref_msgs = 'NO STOP codon before ATG. ' end return [name, best_region.score, best_region.type, best_region.t_start, best_region.t_end, best_region.frame, best_region.seq, ref_msgs, best_region.stop_codon, best_region.more_than_one_frame] else ref_score = 0.0 ref_start = 0 ref_end = 0 ref_frame = 0 ref_orf = '' ref_type = 'unknown' ref_msgs = 'Non coding ORF found >= 200 nt ' return [name, ref_score, ref_type, ref_start, ref_end, ref_frame, ref_orf, ref_msgs, false, false] end end # cuando se unen dos regiones de diferentes frames, nos dice de que tipo es la union, Complete, Internal, N-terminus... def type_fusion(prev_type,one_type) res_type = :Internal if (prev_type == :C_terminus) #-------------- C-terminus if (one_type == :N_terminus) res_type = :Internal elsif (one_type == :Internal) res_type = :Internal elsif (one_type == :Complete) res_type = :C_terminus end elsif (prev_type == :N_terminus) #-------------- N-terminus if (one_type == :C_terminus) res_type = :Complete elsif (one_type == :Internal) res_type = :N_terminus elsif (one_type == :Complete) res_type = :Complete end elsif (prev_type == :Internal) #-------------- Internal if (one_type == :C_terminus) res_type = :C_terminus elsif (one_type == :N_terminus) res_type = :Internal elsif (one_type == :Complete) res_type = :C_terminus end elsif (prev_type == :Complete) #-------------- Complete if (one_type == :C_terminus) res_type = :Complete elsif (one_type == :N_terminus) res_type = :N_terminus elsif (one_type == :Internal) res_type = :N_terminus end end return res_type end # para escoger la region codificante mas grande, incluso solapando verios frames def compare_regions(this_strand,tc_fasta) # select the largest ORF without frame repair-------------------------------- hay que comentar todo el bloque de arriba best_orf = nil best_orf = this_strand.sort{|x,y| y.seq.length <=> x.seq.length }[0] if !best_orf.nil? best_orf.type = best_orf.status end return best_orf end # to add regions over a determinated size def add_region(orf_seq, orf_t_start, orf_t_end, orf_frame, orf_stop_codon, orf_type,seq) if (orf_seq.length >= 200) # puts "#{seq.seq_name}, #{orf_t_start} - #{orf_t_end}, #{orf_frame}, #{orf_type}" seq.add_orf(orf_seq, orf_t_start, orf_t_end, orf_frame, orf_stop_codon, orf_type) end end # se buscan las regiones Complete, N-terminus, C-terminus e Internal en la secuencia de cada frame def generate_uncomplete_orf(a,frame,seq) my_atg = true atg_codon = false stop_codon = false any_stop = false orf ='' t_start = 0 t_end = 0 a.each do |e| t_end += 3 orf += e if (e == 'ATG') && (!atg_codon) atg_codon = true stop_codon = false t_start = t_end - 3 elsif (e == 'TAG') || (e == 'TGA') || (e == 'TAA') orf_tmp = orf[t_start..t_end] if (any_stop) # case 1, complete orf if (atg_codon) orf_tmp = orf[t_start..t_end] add_region(orf_tmp, t_start, t_end, frame, any_stop, :Complete,seq) end else # case 2, C_terminus if (my_atg) orf_tmp = orf[0..t_end] add_region(orf_tmp, 0, t_end, frame, any_stop, :C_terminus,seq) # case 3, putative complete, complete without stop codon before atg if (atg_codon) orf_tmp = orf[t_start..t_end] add_region(orf_tmp, t_start, t_end, frame, any_stop, :Complete, seq) end end end stop_codon = true any_stop = true my_atg = false atg_codon = false t_start += 3 end end # case 4, N_terminus and case 6, putative N_terminus if (atg_codon) && (!stop_codon) orf_tmp = orf[t_start..t_end] add_region(orf_tmp, t_start, t_end, frame, any_stop, :N_terminus,seq) end # case 5, internal if (my_atg) && (!any_stop) orf_tmp = orf[0..t_end] add_region(orf_tmp, 0, t_end, frame, any_stop, :Internal,seq) end end # recorre cada uno de los frames y los pasa a generate_uncomplete_orf def uncomplete_orf_finder(seq) s = seq.seq_fasta.upcase f1 = s.split('').each_slice(3).map{|e| e.join} generate_uncomplete_orf(f1,1,seq) s.sub!(/^./,'') f2 = s.split('').each_slice(3).map{|e| e.join} generate_uncomplete_orf(f2,2,seq) s.sub!(/^./,'') f3 = s.split('').each_slice(3).map{|e| e.join} generate_uncomplete_orf(f3,3,seq) # vamos a por los ORFs de la cadena complementaria s = seq.seq_fasta.upcase s = s.complementary_dna f4 = s.split('').each_slice(3).map{|e| e.join} generate_uncomplete_orf(f4,-1,seq) s.sub!(/^./,'') f5 = s.split('').each_slice(3).map{|e| e.join} generate_uncomplete_orf(f5,-2,seq) s.sub!(/^./,'') f6 = s.split('').each_slice(3).map{|e| e.join} generate_uncomplete_orf(f6,-3,seq) end # | = ATG # * = STOP # # -------*---|>>>>>>>>>>>>>> 1 >>>>>>>>>>>>*------------- # >>>>>>>>>> 2 >>>>>>>>>>>>>*---------------------------- # ---|>>>>>>>> 3 >>>>>>>>>>>>*--------------------------- # -------------------*-----------------|>>>>>>>>>> 4 >>>> # >>>>>>>>>>>>>>>>>>>>>>>>>> 5 >>>>>>>>>>>>>>>>>>>>>>>>>> #--------------------------- 6 ----------|>>>>>>>>>>>>>>> # # 1 complete orf, with stop codon before atg # 2 C-terminus # 3 putative complete, complete without stop codon before atg # 4 N_terminus, with stop codon before atg # 5 internal # 6 putative N_terminus, N_terminus without stop codon before atg # no se usa def orf_fusion(orfs_array,tc_fasta,name,ref_status,ref_code,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) lower_start = 9999 higher_end = 0 new_orf = false ref_name = name # vamos a coger el mejor de referencia (el coding mas largo) # y vamos a poner en el warning los coding que estan en el mismo sentido y no estan contenidos en el mejor orfs_array.sort! {|orf1,orf2| orf1[1] <=> orf2[1]} tmp_orf = ref_orf tmp_start = ref_start tmp_end = ref_end tmp_frame = ref_frame tmp_msg = '' # puts "\n\n#{name} ---- tmp_frame: #{tmp_frame} ------------------------ \n\n" orfs_array.each do |orf| (orf[1],orf[2]) = corrige_frame(orf[3],orf[1],orf[2]) # (orf[1],orf[2]) = $an.corrige_frame(orf[3],orf[1],orf[2]) # puts "*** name: #{name}, orf[1]: #{orf[1]}, orf[2]: #{orf[2]}, orf[3]: #{orf[3]} ref_start: #{ref_start}, ref_end: #{ref_end}, ref_frame: #{ref_frame} ***\n\n" if (orf[0] != tmp_orf) if ((tmp_end >= orf[1]) && (tmp_end <= orf[2])) || ((tmp_start >= orf[1]) && (tmp_start <= orf[2])) # los ORFs solapan # puts "SOLAPAN frame: #{orf[3]} tmp_start: #{tmp_start} tmp_end: #{tmp_end} orf_start: #{orf[1]} orf_end: #{orf[2]}" if (tmp_frame > 0) tmp_msg = ", overlapping coding region (#{orf[1]},#{orf[2]})" elsif (tmp_frame < 0) tmp_msg = ", overlapping coding region (-#{orf[1]},-#{orf[2]})" end new_orf = true elsif (tmp_end < orf[1]) || (tmp_start > orf[2]) # los ORFs estan separados # puts "#{name} frame: #{orf[3]} SEPARADOS --> tmp_start: #{tmp_start} tmp_end: #{tmp_end} orf_start: #{orf[1]} orf_end: #{orf[2]}" if (tmp_frame > 0) tmp_msg = ", other coding region (#{orf[1]},#{orf[2]})" elsif (tmp_frame < 0) tmp_msg = ", other coding region (-#{orf[1]},-#{orf[2]})" end new_orf = true end if (new_orf == true) if (orf[1] < lower_start) lower_start = orf[1] end # if (orf[2] > higher_end) # end (tmp_code,tmp_status)=testCode_exec(tc_fasta[lower_start-1..higher_end-1]) if (tmp_status != 'unknown') # puts "#{name} ----------------------------------- FUSION!!!!!!!!!!!!!!!!\n\n" # tenemos varios ORFs q son codificantes al unirlos ref_msgs += tmp_msg new_orf = false else # puts "#{name} ------------------------------------NO se unen\n\n" end end end end return[ref_status,ref_code,ref_name,ref_start,ref_end,ref_frame,ref_orf,ref_msgs] end # no se usa def t_code_old(seq) ref_code = 0.0 ref_name = '' ref_start = 0 ref_end = 0 ref_frame = 0 ref_status = '' ref_orf = '' ref_msgs = '' name = seq.seq_name tc_fasta = seq.seq_fasta # generamos todos los ORFs mayores de 200pb de cada secuencia, en fl_string_utils orfs_array = tc_fasta.orf_finder if (orfs_array[0].nil?) ref_name = name ref_code = 0.0 ref_start = 0 ref_end = 0 ref_frame = 0 ref_status = 'unknown' ref_orf = '' ref_msgs = 'ORF length < 200 nt' # ref_msgs = 'Your sequence has not an ORF longer than 200 nt' else one_good_orf_minus = false more_than_one_minus = false one_good_orf_plus = false more_than_one_plus = false good_orfs_minus = [] good_orfs_plus = [] one_coding = false one_putative = false # orfs_array.sort! {|orf1,orf2| (orf1[2] - orf1[1]) <=> (orf2[2] - orf2[1])} orfs_array.each do |orf| # long = orf.length - 1 if (orf[0]) # if (long >= 200) (t_code,t_status)=testCode_exec(orf[0]) # puts "name: #{name},t_status: #{t_status}, t_code: #{t_code}, stop_codon: #{orf[4]}, length: #{tc_fasta.length}, start: #{orf[1]}, end: #{orf[2]}, frame: #{orf[3]}\n\n" if (t_status != 'unknown') if (orf[3].to_i < 0) if (one_good_orf_minus == true) more_than_one_minus = true orf.push t_code orf.push t_status good_orfs_minus.push orf else one_good_orf_minus = true orf.push t_code #orf[5] orf.push t_status #orf[6] good_orfs_minus.push orf end elsif (orf[3].to_i > 0) if (one_good_orf_plus == true) more_than_one_plus = true orf.push t_code orf.push t_status good_orfs_plus.push orf else one_good_orf_plus = true orf.push t_code #orf[5] orf.push t_status #orf[6] good_orfs_plus.push orf end end end # if (t_code.to_f > ref_code) # cogemos el de mejor testcode # puts "name: #{name}, orf[0].length: #{orf[0].length}, ref_orf.length: #{ref_orf.length}, t_status: #{t_status}, one_coding: #{one_coding}\n\n" if (orf[0].length > ref_orf.length) and (t_status == 'coding') # cogemos el mayor de los coding # puts "compleeeeeeeeeeeeeeetttttttttttaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ref_code = t_code.to_f ref_name = name ref_orf = orf[0] ref_start = orf[1] ref_end = orf[2] ref_frame = orf[3] one_coding = true (ref_status,ref_msgs) = checking_beginning(orf[4],t_status,orf[1],tc_fasta,orf[2]) elsif (one_coding == false) and (orf[0].length > ref_orf.length) and (t_status == 'putative_coding') # puts "putaaaaaaaaaaaaaaaaaaativeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee" ref_code = t_code.to_f ref_name = name ref_orf = orf[0] ref_start = orf[1] ref_end = orf[2] ref_frame = orf[3] one_putative = true (ref_status,ref_msgs) = checking_beginning(orf[4],t_status,orf[1],tc_fasta,orf[2]) elsif (one_coding == false) and (one_putative == false) # puts "unknoooooooooooooooooooooooooooooooooooooooooooooooooooooooooooown" ref_status = t_status ref_code = t_code.to_f ref_name = name ref_orf = orf[0] ref_start = orf[1] ref_end = orf[2] ref_frame = orf[3] ref_msgs = 'Bad testcode score' # ref_msgs = 'test code did not find a coding region in your sequence' end # puts "**name: #{name},ref_status: #{ref_status}, ref_code: #{ref_code}, length: #{tc_fasta.length}, ref_start: #{ref_start}, ref_end: #{ref_end}, ref_frame: #{ref_frame}\n\n" # puts "t_code: #{t_code}, t_status: #{t_status}" end end # vamos a preparar el mejor ORF y devolverlo (ref_start,ref_end) = corrige_frame(ref_frame,ref_start,ref_end) # puts "**name: #{name}, ref_start: #{ref_start}, ref_end: #{ref_end}, ref_frame: #{ref_frame}\n\n" # si encontramos más de un orf valido if (more_than_one_plus) or (more_than_one_minus) if (ref_frame > 0) if (more_than_one_plus) (ref_status,ref_code,ref_name,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) = orf_fusion(good_orfs_plus,tc_fasta,name,ref_status,ref_code,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) # if (ref_frame == 0) # ref_frame = 7 # end # puts "merging ORFs with orf_fusion!!!!" end elsif (ref_frame < 0) if (more_than_one_minus) (ref_status,ref_code,ref_name,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) = orf_fusion(good_orfs_minus,tc_fasta,name,ref_status,ref_code,ref_start,ref_end,ref_frame,ref_orf,ref_msgs) # if (ref_frame == 0) # ref_frame = -7 # end # puts "merging ORFs with orf_fusion!!!!" end end end # puts "name: #{name},t_status: #{ref_status}, t_code: #{ref_code}, frame: #{ref_frame},start: #{ref_start}, end: #{ref_end}\n\n" if (ref_frame < 0) (kk1,kk2,ref_start2,ref_end2) = reverse_seq(tc_fasta,ref_frame,ref_start,ref_end) ref_start_ok = "#{ref_start2} (-#{ref_start})" ref_end_ok = "#{ref_end2} (-#{ref_end})" else ref_start_ok = ref_start ref_end_ok = ref_end end end return [ref_name,ref_code,ref_status,ref_start_ok,ref_end_ok,ref_frame,ref_orf,ref_msgs] end # no se usa # gnuplot must be installed def window_walking(seq) tcode_array = [] y1=[] y2=[] s = seq.seq_fasta.upcase (s.length-200).times do |i| (t_code,t_status)=testCode_exec(s[i..i+199]) y1.push t_code end tcode_array_rc = [] src = s.complementary_dna (src.length-200).times do |i| (t_code,t_status)=testCode_exec(src[i..i+199]) # puts "#{i}-#{i+199}" # puts t_status # puts t_code puts src[i..i+199] y2.push t_code end # Create lines plot p=ScbiPlot::Lines.new('lines.png','title') x=(1..src.length-200).entries p.add_x(x) # puts "x_length: #{src.length-200} #{x}" # puts "x: #{x.length}, y: #{y1.length}" p.add_series('serie0', y1) p.do_graph end # no se usa # para escoger la region codificante mas grande, incluso solapando verios frames def compare_regions_old(this_strand,tc_fasta) # array_regions = [] # array_regions.push this_strand.first.dup # # this_strand.each do |one_orf| # # prev_orf = array_regions.last.dup # taking last orf before add actual orf # array_regions.push one_orf.dup # add actual orf, el primer orf me va a salir duplicado # # if prev_orf.overlaps?(one_orf) # prev_orf.seq = tc_fasta[prev_orf.t_start..one_orf.t_end] # # (t_code,t_status)=testCode_exec(prev_orf.seq) # # prev_orf.score = t_code # prev_orf.status = t_status # # #t_start, frame and stop_codon are the same # prev_orf.t_end = one_orf.t_end # if (prev_orf.type != one_orf.type) # prev_orf.type = type_fusion(prev_orf.type,one_orf.type) # end # prev_orf.more_than_one_frame = true # # # puts "overlaps:" # # puts "#{one_orf.t_start} - #{one_orf.t_end}" # # puts "#{array_regions.last.t_start} - #{array_regions.last.t_end}" # # array_regions.push prev_orf # add overlapped orf, # end # # end # # # select the largest ORF without frame repair-------------------------------- hay que comentar todo el bloque de arriba # # best_orf = nil # # best_orf = this_strand.sort{|x,y| y.seq.length <=> x.seq.length }[0] # # --------------------------------------------------------------------------- # # # select the largest ORF ---------------------------------------------------- # # best_orf = nil # # best_orf = array_regions.sort{|x,y| y.seq.length <=> x.seq.length }[0] # # if !best_orf.nil? # # puts "best_orf.status: #{best_orf.status}, best_orf.type: #{best_orf.type}" # # end # # # # --------------------------------------------------------------------------- # # # select the largest coding ORF --------------------------------------------- # best_orf = nil # array_regions.sort{|x,y| y.seq.length <=> x.seq.length }.each do |one_orf| # # puts "#{one_orf.status}, #{one_orf.type}" # if (one_orf.status == :coding) # best_orf = one_orf # break # end # end # # --------------------------------------------------------------------------- # # # puts "\n\n\nstart" # # array_regions.sort{|x,y| y.seq.length <=> x.seq.length }.each do |one_orf| # # puts one_orf.seq.length # # end # # puts "end\n\n\n" # # puts "best_orf: #{best_orf.seq.length}" # # # if !best_orf.nil? # # best_orf.type = best_orf.status # # end # # return best_orf end ###### test code program functions ######### def testCode_exec (sequence) sequence.downcase! basesOne = [0,0,0,0]; basesTwo = [0,0,0,0]; basesThree = [0,0,0,0]; #for (j = 0; j < sequence.length; j = j + 3) 0.step(sequence.length-1,3) do |j| if (sequence[j].chr == "g") basesOne[0] = basesOne[0] + 1; elsif (sequence[j].chr == "a") basesOne[1] = basesOne[1] + 1; elsif (sequence[j].chr == "t") basesOne[2] = basesOne[2] + 1; elsif (sequence[j].chr == "c") basesOne[3] = basesOne[3] + 1; else end end #for (j = 1; j < sequence.length; j = j + 3) 1.step(sequence.length-1,3) do |j| if (sequence[j].chr == "g") basesTwo[0] = basesTwo[0] + 1; elsif (sequence[j].chr == "a") basesTwo[1] = basesTwo[1] + 1; elsif (sequence[j].chr == "t") basesTwo[2] = basesTwo[2] + 1; elsif (sequence[j].chr == "c") basesTwo[3] = basesTwo[3] + 1; else end end #for (j = 2; j < sequence.length; j = j + 3) 2.step(sequence.length-1,3) do |j| if (sequence[j].chr == "g") basesThree[0] = basesThree[0] + 1; elsif (sequence[j].chr == "a") basesThree[1] = basesThree[1] + 1; elsif (sequence[j].chr == "t") basesThree[2] = basesThree[2] + 1; elsif (sequence[j].chr == "c") basesThree[3] = basesThree[3] + 1; else end end paramG = calcParam(basesOne[0],basesTwo[0],basesThree[0]); contentG = countBases(basesOne[0],basesTwo[0],basesThree[0]) / sequence.length.to_f; posProbG = usePosParam(paramG,"g"); contProbG = useContParam(contentG,"g"); paramA = calcParam(basesOne[1],basesTwo[1],basesThree[1]); contentA = countBases(basesOne[1],basesTwo[1],basesThree[1]) / sequence.length.to_f; posProbA = usePosParam(paramA,"a"); contProbA = useContParam(contentA,"a"); paramT = calcParam(basesOne[2],basesTwo[2],basesThree[2]); contentT = countBases(basesOne[2],basesTwo[2],basesThree[2]) / sequence.length.to_f; posProbT = usePosParam(paramT,"t"); contProbT = useContParam(contentT,"t"); paramC = calcParam(basesOne[3],basesTwo[3],basesThree[3]); contentC = countBases(basesOne[3],basesTwo[3],basesThree[3]) / sequence.length.to_f; posProbC = usePosParam(paramC,"c"); contProbC = useContParam(contentC,"c"); valueY = posProbG * 0.31 + contProbG * 0.15 + posProbA * 0.26 + contProbA * 0.11 + posProbT * 0.33 + contProbT * 0.14 + posProbC * 0.18 + contProbC * 0.12; valueY = ((valueY*1000.0).round/1000.0); # return 'The TestCode value is ' + valueY.to_s + ', which indicates that the sequence ' + getConclusion(valueY) + '.'; return [valueY.to_s, getConclusion(valueY)] end def calcParam (valueOne,valueTwo,valueThree) paramArray = [valueOne,valueTwo,valueThree]; paramArray = paramArray.sort#{|a,b| return a-b}#(compareNumbers); paramValue = paramArray[2] / (paramArray[0] + 1.0); # puts paramArray.to_json return paramValue; end def countBases (valueOne,valueTwo,valueThree) return valueOne + valueTwo + valueThree; end def usePosParam (paramValue,base) arrayOfCodingProb = []; codeProb = 0; if (base == "g") arrayOfCodingProb = [0.08,0.08,0.16,0.27,0.48,0.53,0.64,0.74,0.88,0.90] elsif (base == "a") arrayOfCodingProb = [0.22,0.20,0.34,0.45,0.68,0.58,0.93,0.84,0.68,0.94] elsif (base == "t") arrayOfCodingProb = [0.09,0.09,0.20,0.54,0.44,0.69,0.68,0.91,0.97,0.97] elsif (base == "c") arrayOfCodingProb = [0.23,0.30,0.33,0.51,0.48,0.66,0.81,0.70,0.70,0.80] end if (paramValue >= 0 and paramValue < 1.1) codeProb = arrayOfCodingProb[0]; elsif (paramValue >=1.1 and paramValue < 1.2) codeProb = arrayOfCodingProb[1]; elsif (paramValue >=1.2 and paramValue < 1.3) codeProb = arrayOfCodingProb[2]; elsif (paramValue >=1.3 and paramValue < 1.4) codeProb = arrayOfCodingProb[3]; elsif (paramValue >=1.4 and paramValue < 1.5) codeProb = arrayOfCodingProb[4]; elsif (paramValue >=1.5 and paramValue < 1.6) codeProb = arrayOfCodingProb[5]; elsif (paramValue >=1.6 and paramValue < 1.7) codeProb = arrayOfCodingProb[6]; elsif (paramValue >=1.7 and paramValue < 1.8) codeProb = arrayOfCodingProb[7]; elsif (paramValue >=1.8 and paramValue < 1.9) codeProb = arrayOfCodingProb[8]; elsif (paramValue >=1.9) codeProb = arrayOfCodingProb[9]; end return codeProb; end def useContParam (paramValue,base) arrayOfCodingProb = []; codeProb = 0; if (base == "g") arrayOfCodingProb = [0.29,0.33,0.41,0.41,0.73,0.64,0.64,0.47,0.54,0.40] elsif (base == "a") arrayOfCodingProb = [0.21,0.81,0.65,0.67,0.49,0.62,0.55,0.44,0.49,0.28] elsif (base == "t") arrayOfCodingProb = [0.58,0.51,0.69,0.56,0.75,0.55,0.40,0.39,0.24,0.28] elsif (base == "c") arrayOfCodingProb = [0.31,0.39,0.44,0.43,0.59,0.59,0.64,0.51,0.64,0.82] end if (paramValue >= 0 and paramValue < 0.17) codeProb = arrayOfCodingProb[0]; elsif (paramValue >=0.17 and paramValue < 0.19) codeProb = arrayOfCodingProb[1]; elsif (paramValue >=0.19 and paramValue < 0.21) codeProb = arrayOfCodingProb[2]; elsif (paramValue >=0.21 and paramValue < 0.23) codeProb = arrayOfCodingProb[3]; elsif (paramValue >=0.23 and paramValue < 0.25) codeProb = arrayOfCodingProb[4]; elsif (paramValue >=0.25 and paramValue < 0.27) codeProb = arrayOfCodingProb[5]; elsif (paramValue >=0.27 and paramValue < 0.29) codeProb = arrayOfCodingProb[6]; elsif (paramValue >=0.29 and paramValue < 0.31) codeProb = arrayOfCodingProb[7]; elsif (paramValue >=0.31 and paramValue < 0.33) codeProb = arrayOfCodingProb[8]; elsif (paramValue >=0.33) codeProb = arrayOfCodingProb[9]; end return codeProb; end def getConclusion (testCode_value) codeProb = ""; if (testCode_value < 0.74) codeProb = :unknown; elsif (testCode_value >=0.74 and testCode_value < 0.95) codeProb = :putative_coding; elsif (testCode_value >=0.95) codeProb = :coding; end return codeProb; end end