# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # 'Software'), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require 'scbi_blast' require 'fl_string_utils.rb' OPERATION = 0 QUERY = 1 TARGET = 2 class ExoBlastHit < BlastHit attr_accessor :q_frameshift, :s_frameshift def initialize(start_target, ends_target, start_query, ends_query) super(start_target, ends_target, start_query, ends_query) @s_frameshift=[] @q_frameshift=[] end end # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit" class ExonerateResult # Parser initialization def initialize(input, seqs= nil, query_seqs = nil, all = true) @querys = [] @seqs = seqs #unigenes @prot_seqs = query_seqs#prot if input.is_a?(Array) input.each do |file| parse_file(File.open(file).readlines, all) end else parse_file(File.open(input).readlines, all) end query_name='' end def parse_file(lines, all) lines_parsed=[] lines_parsed={} if !all lines.each do |line| if line=~ /^vulgar:/ line.chomp! fields=line.split(' ', 11) features={'query_id'=> fields[1], 'query_start_align'=> fields[2].to_i, 'query_end_align'=> fields[3].to_i, 'query_strand'=> fields[4],'target_id'=> fields[5], 'target_start_align'=> fields[6].to_i, 'target_end_align'=> fields[7].to_i, 'target_strand'=> fields[8], 'score'=> fields[9].to_i, 'align_data'=> fields[10]} if all lines_parsed << features else if !lines_parsed.key?(features['target_id']) # Añadir valor si no existe lines_parsed[features['target_id']]=features else if features['score']>lines_parsed[features['target_id']]['score'] # Si ya existe una query, ver si la nueva presenta un mayor score y reemplazar la antigua lines_parsed[features['target_id']]=features end end end end end convert_parsed_lines(lines_parsed) end def convert_parsed_lines(lines_parsed) last_query = nil query = nil lines_parsed.each_with_index do |line| begin if lines_parsed.class.to_s=='Array' align_data=line['align_data'] features=line else #hash align_data=line[1]['align_data'] features=line[1] end tags = align_data.scan(/([MFG53S]) ([0-9]+) ([0-9]+)/) tags.map!{|tag| [tag[0], tag[1].to_i, tag[2].to_i]} if features['target_id'] != last_query last_query = features['target_id'] query = BlastQuery.new(features['target_id']) @querys << query end hiting(features,tags, query) rescue puts "Result: #{features['target_id']} => #{features['query_id']} hasn't been parsed\n#{line}" end end end #this method only works fine with --model protein2dna parameter of exonerate def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits do_align = false do_align = true if !@prot_seqs.nil? && !@seqs.nil? start_target = features['target_start_align']#Unigen start_query = features['query_start_align'] #proteina ends_target = features['target_end_align'] ends_query = features['query_end_align']-1 # -> Exonerate don't set to 0 position the ends of target and query if features['target_strand'] == '-' #-> Exonerate don't set to 0 position the ends of target and query start_target -= 1 # Start target is end target when mathc is in reversed complementary strand else ends_target -= 1 end hit = ExoBlastHit.new(start_target+1, ends_target+1, start_query+1, ends_query+1) define_hit_parameters(hit, features, tags) query.add_hit(hit) #Define alignment and blast like parameters target_alignment = '' query_alignment = '' counter_target = start_target counter_query = start_query if do_align #get seqs query_seq = @prot_seqs[features['query_id']] target_seq = @seqs[features['target_id']] end counter_target, target_seq = do_reverso_complementary(counter_target, target_seq) if features['target_strand'] == '-' query_frameshift = [] target_frameshift = [] gap_shift = 0 #puts features['query_id']+ ' ' +features['target_strand'], '-----------------------' tags.each_with_index do |tag, n_operation| #puts tag.inspect if do_align gap_shift = 0 if tag[OPERATION] != 'G' query_alignment << query_seq[counter_query, tag[QUERY]] target_alignment << target_seq[counter_target, tag[TARGET]].translate end if tag[OPERATION] == 'F' if tag[TARGET] > 0 && tag[TARGET] < 3 #true FRAMESHIFT gap_shift += 1 if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it query_alignment << '-' if do_align end else query_alignment << '-' * (tag[TARGET]/3.0).ceil if do_align end query_frameshift << counter_query fs_counter_target = counter_target fs_counter_target = target_seq.length - counter_target if features['target_strand'] == '-' # ESto es un apaño, habria que plantear el parseo de las reversas como reduccion en el contador del formato del exonerate, en vez de como adiccion if tag[TARGET] > 3 real_fs = tag[TARGET]%3 real_gap = tag[TARGET] - real_fs fs = [fs_counter_target + real_gap, real_fs] else fs = [fs_counter_target, tag[TARGET]] end target_frameshift << fs elsif tag[OPERATION] == 'G' query_alignment << '-' * (tag[TARGET]/3.0).ceil if do_align diff = tag[QUERY] - gap_shift target_alignment << '-' * diff if do_align && diff > 0 gap_shift = 0 end counter_query += tag[QUERY] counter_target += tag[TARGET] end hit.s_frameshift = query_frameshift hit.q_frameshift = target_frameshift #puts "\e[33m#{target_alignment}\e[0m", "\e[36m#{query_alignment}\e[0m" if do_align hit.q_seq = target_alignment hit.s_seq = query_alignment hit.align_len = query_alignment.length hit.ident = set_ident(target_alignment,query_alignment) end end #def def do_reverso_complementary(counter_target, target_seq) counter_target = target_seq.length - 1 - counter_target target_seq = target_seq.complementary_dna return counter_target, target_seq end def set_ident(target_alignment, query_alignment) matchs = 0 position = 0 target_alignment.each_char do |char| matchs +=1 if char == query_alignment[position] position +=1 end perc_ident = ('%.2f' % (matchs*100.0/target_alignment.length)).to_f return perc_ident end def define_hit_parameters(hit, features, tags) hit.gaps = 0 tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'} hit.reversed = false hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1 hit.mismatches=0 hit.e_val=0 hit.bit_score=0 hit.score = features['score'] hit.s_frame = nil strand = 1 strand = -1 if features['target_strand'] == '-' hit.q_frame = (((features['target_start_align']) % 3) +1) *strand hit.subject_id = features['query_id'] hit.full_subject_length=0 hit.definition='' hit.acc=features['query_id'] hit.q_seq='' hit.s_seq='' end # inspect results def inspect res = "Exonerate results:\n" res+= '-'*20 res+= "\nQuerys: #{@querys.count}\n" @querys.each{|q| res+=q.inspect+"\n"} return res end # find query by name def find_query(querys,name_q) # newq = querys.find{|q| ( q.find{|h| (h.subject_id)})} new_q=nil if !querys.empty? new_q=querys.find{|q| (q.query_id==name_q)} end return new_q end # check if there are querys def empty? return @querys.empty? end # get query count def size @querys.size end attr_accessor :querys end