lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.1 vs lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.2

- old
+ new

@@ -3,11 +3,10 @@ require 'bio' module Bio class Util -<<<<<<< HEAD class MrnaModel < Bio::GFF::GFF3::Record attr_accessor :seq, :cds def initialize(gff_line) super gff_line @cds = [] @@ -76,83 +75,28 @@ b = Bio::Sequence::NA.new(new_codon).translate.codes.first sub_type = a == b ? "SYN" : "NON_SYN" return {#:id => self.gffid, :chr => self.seqname, :strand => self.strand, -======= - class MrnaModel - attr_accessor :seqname, :gff_id, :strand, :cds, :sequences - - def initialize(chr, id, strand, cds_arr, seq_arr) - @seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr - end - - def includes?(seq, point) - @cds.each {|start, stop| return true if @seqname == seq and point.to_i >= start and point.to_i <= stop} - false - end - - def seq - @sequences.join - end - - def substitution_info(chr,point,alt) - cds_start = @cds.first.first - running_total = 0 - @cds.each do |start,stop| - if point.to_i >= start and point.to_i <= stop - offset = case @strand - when "+" - #offset = - (point.to_i - start) + running_total - when "-" - (stop - point.to_i) + running_total - end #offset = how far into cds SNP is - codon_number = offset / 3 - position_in_codon = offset % 3 - #pp [offset, codon_number, position_in_codon] - codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b} - codon = codon_array[codon_number] - nt = codon[position_in_codon] - new_codon = codon.dup - new_codon[position_in_codon] = alt.downcase - #pp [codon, position_in_codon, nt, new_codon] - a = Bio::Sequence::NA.new(codon).translate.codes.first - b = Bio::Sequence::NA.new(new_codon).translate.codes.first - sub_type = a == b ? "SYN" : "NON_SYN" - return {:id => @gff_id, - :chr => @seqname, - :strand => @strand, ->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af :position => point, :original_codon => codon, :original_residue => a || 'stop', :mutant_codon => new_codon, :mutant_residue =>b || 'stop', -<<<<<<< HEAD :position_in_codon => position + 1, :substitution_type => sub_type } -======= - :position_in_codon => position_in_codon + 1, - :substitution_type => sub_type - } - end - running_total += (stop - start) - running_total += 1 if @strand == '-' #how far we are into the cds - end ->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af + end end#class end class SynReport #attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash def initialize(opts) -<<<<<<< HEAD cdses = [] mrna_list = [] seqs = Hash.new Bio::FastaFormat.open(opts[:fasta]).each { |seq| seqs[seq.entry_id] = seq.to_seq } @@ -188,72 +132,14 @@ end#init end def is_in_cds?(chr,point) self.mutation_info(chr,point,"a") ? true : false -======= - @gene_array = [] - @cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } } - @mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } } - File.open(opts[:gff], "r").each do |gffline| - record=Bio::GFF::GFF3::Record.new(gffline) - if(record.feature_type == 'gene') - @gene_array << [record.seqname, record.id] - elsif(record.feature_type == 'CDS' or record.feature_type == 'mRNA') - parents = record.get_attributes('Parent') - parents.each do |parent| - if record.feature_type == 'CDS' - @cdshash[record.seqname][parent] << record - else - @mRNAhash[record.seqname][parent] << record - end - end - end end - $stderr.puts "Loaded GFF..." if opts[:verbose] - @seqhash = {} - Bio::FastaFormat.open(opts[:fasta]).each { |seq| @seqhash[seq.entry_id] = seq.to_seq } - $stderr.puts "Loaded Seq..." if opts[:verbose] - @models = Hash.new {|h,k| h[k] = [] } - $stderr.puts "Building models..." if opts[:verbose] - @gene_array.each do |gene| - - mRNAs=@mRNAhash[gene.first][gene.last] - mRNAs.each do |mRNA| - next if @seqhash[gene.first].nil? - cdsa = [] - seqs = [] - cdsary=@cdshash[gene.first][mRNA.id] - cdsary.each {|c| cdsa << [c.start, c.end]} - cdsa.sort! - cdsa.reverse! if mRNA.strand == '-' - - cdsa.each do |cds| - - #cdsa << [cds.start, cds.end] - if mRNA.strand == '+' - seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ) - elsif mRNA.strand == "-" - seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ).complement - end - end - @models[mRNA.seqname] << Bio::Util::MrnaModel.new(mRNA.seqname, mRNA.id, mRNA.strand, cdsa, seqs ) - #pp @models[mRNA.seqname][-1].cds if mRNA.id == 'AT2G17530.1' or mRNA.id == 'AT2G17550.1' - end - end - $stderr.puts "Models built..." if opts[:verbose] - end#init end - - def is_in_cds?(chr,point) - @self.mutation_info(chr,point) ? true : false ->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af - end - #returns mutation info if point in CDS, if not in CDS returns false def mutation_info(chr,pos,alt) -<<<<<<< HEAD pos = pos.to_i #cant do indels ... return nil if alt.length > 1 begin @mrnas[chr].each_pair do |mrna_id, mrna| @@ -264,18 +150,9 @@ false rescue #somthing unpredicatable went wrong and we couldnt do the conversion ... return nil end -======= - - @models[chr].each do |m| - if m.includes?(chr,pos) - return m.substitution_info(chr,pos,alt) - end - end - false ->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af end end#class end end#class util end \ No newline at end of file