lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.0 vs lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.1

- old
+ new

@@ -3,10 +3,84 @@ require 'bio' module Bio class Util +<<<<<<< HEAD + class MrnaModel < Bio::GFF::GFF3::Record + attr_accessor :seq, :cds + def initialize(gff_line) + super gff_line + @cds = [] + end + + def includes?(seq, point) + return true if self.seqname == seq and point.to_i >= self.cds_start and point.to_i <= self.cds_end + false + end + + def cds_start + @cds.flatten.min + end + + def cds_end + @cds.flatten.max + end + + def get_nt_number_in_cds(point) + to_count = @cds.sort.select {|a| a.first <= point} + in_block = to_count.pop + distance_in = to_count.inject(1) {|tot, b| tot + ( b.last - b.first) + 1 } + overhang = point - in_block.first + left_section = distance_in + overhang + + if self.strand == '-' + length = @cds.sort.inject(0) {|tot, b| tot + ( b.last - b.first) + 1 } + return length - left_section + 1 + end + + return left_section + end + + def codon_index(dist) + (dist - 1) / 3 + end + + def codon_position(dist) + (dist - 1) % 3 + end + + def codon_array + codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b} + codon_array + end + + def nt + end + + ##returns codon and position of nucleotide + def codon_and_index(point) + distance_into_cds = get_nt_number_in_cds point + codon_idx = codon_index distance_into_cds + codon_list = codon_array + codon = codon_list[codon_idx] + pos = codon_position(distance_into_cds) + [codon,pos] + end + + def substitution_info(point,alt) + codon, position = codon_and_index(point) + new_codon = codon.dup + new_codon[position] = alt.downcase + + a = Bio::Sequence::NA.new(codon).translate.codes.first + b = Bio::Sequence::NA.new(new_codon).translate.codes.first + sub_type = a == b ? "SYN" : "NON_SYN" + return {#:id => self.gffid, + :chr => self.seqname, + :strand => self.strand, +======= class MrnaModel attr_accessor :seqname, :gff_id, :strand, :cds, :sequences def initialize(chr, id, strand, cds_arr, seq_arr) @seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr @@ -46,31 +120,79 @@ b = Bio::Sequence::NA.new(new_codon).translate.codes.first sub_type = a == b ? "SYN" : "NON_SYN" return {:id => @gff_id, :chr => @seqname, :strand => @strand, +>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af :position => point, :original_codon => codon, :original_residue => a || 'stop', :mutant_codon => new_codon, :mutant_residue =>b || 'stop', +<<<<<<< HEAD + :position_in_codon => position + 1, + :substitution_type => sub_type + } +======= :position_in_codon => position_in_codon + 1, :substitution_type => sub_type } end running_total += (stop - start) running_total += 1 if @strand == '-' #how far we are into the cds end +>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af end end#class end class SynReport #attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash def initialize(opts) +<<<<<<< HEAD + cdses = [] + mrna_list = [] + seqs = Hash.new + + Bio::FastaFormat.open(opts[:fasta]).each { |seq| seqs[seq.entry_id] = seq.to_seq } + $stderr.puts "Loaded Seq..." if opts[:verbose] + + + @mrnas = Hash.new {|h,k| h[k] = Hash.new} + File.open(opts[:gff], "r").each do |gffline| + record = Bio::GFF::GFF3::Record.new(gffline) + if record.feature_type == 'mRNA' + mrna_list << Bio::Util::MrnaModel.new(gffline) + elsif record.feature_type =='CDS' + cdses << record + end + end + + mrna_list.each do |mrna| + mrna_id = mrna.get_attributes("ID") + $stderr.puts "No ID for #{cds}" if mrna_id.empty? + mrna_id = mrna_id.first + @mrnas[mrna.seqname][mrna_id] = mrna + @mrnas[mrna.seqname][mrna_id].seq = seqs[mrna_id].seq + end + + cdses.each do |cds| + cds_parent = cds.get_attributes("Parent") + $stderr.puts "No Parent for #{cds}" if cds_parent.empty? + cds_parent = cds_parent.first + @mrnas[cds.seqname][cds_parent].cds << [cds.start,cds.end] + end + $stderr.puts "Loaded GFF..." if opts[:verbose] + + + end#init end + + def is_in_cds?(chr,point) + self.mutation_info(chr,point,"a") ? true : false +======= @gene_array = [] @cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } } @mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } } File.open(opts[:gff], "r").each do |gffline| record=Bio::GFF::GFF3::Record.new(gffline) @@ -122,20 +244,38 @@ $stderr.puts "Models built..." if opts[:verbose] end#init end def is_in_cds?(chr,point) @self.mutation_info(chr,point) ? true : false +>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af end #returns mutation info if point in CDS, if not in CDS returns false def mutation_info(chr,pos,alt) +<<<<<<< HEAD + pos = pos.to_i + #cant do indels ... + return nil if alt.length > 1 + begin + @mrnas[chr].each_pair do |mrna_id, mrna| + if mrna.includes?(chr,pos) + return mrna.substitution_info(pos,alt) + end + end + false + rescue + #somthing unpredicatable went wrong and we couldnt do the conversion ... + return nil + end +======= @models[chr].each do |m| if m.includes?(chr,pos) return m.substitution_info(chr,pos,alt) end end false +>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af end end#class end end#class util end \ No newline at end of file