lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.0 vs lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.1
- old
+ new
@@ -3,10 +3,84 @@
require 'bio'
module Bio
class Util
+<<<<<<< HEAD
+ class MrnaModel < Bio::GFF::GFF3::Record
+ attr_accessor :seq, :cds
+ def initialize(gff_line)
+ super gff_line
+ @cds = []
+ end
+
+ def includes?(seq, point)
+ return true if self.seqname == seq and point.to_i >= self.cds_start and point.to_i <= self.cds_end
+ false
+ end
+
+ def cds_start
+ @cds.flatten.min
+ end
+
+ def cds_end
+ @cds.flatten.max
+ end
+
+ def get_nt_number_in_cds(point)
+ to_count = @cds.sort.select {|a| a.first <= point}
+ in_block = to_count.pop
+ distance_in = to_count.inject(1) {|tot, b| tot + ( b.last - b.first) + 1 }
+ overhang = point - in_block.first
+ left_section = distance_in + overhang
+
+ if self.strand == '-'
+ length = @cds.sort.inject(0) {|tot, b| tot + ( b.last - b.first) + 1 }
+ return length - left_section + 1
+ end
+
+ return left_section
+ end
+
+ def codon_index(dist)
+ (dist - 1) / 3
+ end
+
+ def codon_position(dist)
+ (dist - 1) % 3
+ end
+
+ def codon_array
+ codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
+ codon_array
+ end
+
+ def nt
+ end
+
+ ##returns codon and position of nucleotide
+ def codon_and_index(point)
+ distance_into_cds = get_nt_number_in_cds point
+ codon_idx = codon_index distance_into_cds
+ codon_list = codon_array
+ codon = codon_list[codon_idx]
+ pos = codon_position(distance_into_cds)
+ [codon,pos]
+ end
+
+ def substitution_info(point,alt)
+ codon, position = codon_and_index(point)
+ new_codon = codon.dup
+ new_codon[position] = alt.downcase
+
+ a = Bio::Sequence::NA.new(codon).translate.codes.first
+ b = Bio::Sequence::NA.new(new_codon).translate.codes.first
+ sub_type = a == b ? "SYN" : "NON_SYN"
+ return {#:id => self.gffid,
+ :chr => self.seqname,
+ :strand => self.strand,
+=======
class MrnaModel
attr_accessor :seqname, :gff_id, :strand, :cds, :sequences
def initialize(chr, id, strand, cds_arr, seq_arr)
@seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr
@@ -46,31 +120,79 @@
b = Bio::Sequence::NA.new(new_codon).translate.codes.first
sub_type = a == b ? "SYN" : "NON_SYN"
return {:id => @gff_id,
:chr => @seqname,
:strand => @strand,
+>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
:position => point,
:original_codon => codon,
:original_residue => a || 'stop',
:mutant_codon => new_codon,
:mutant_residue =>b || 'stop',
+<<<<<<< HEAD
+ :position_in_codon => position + 1,
+ :substitution_type => sub_type
+ }
+=======
:position_in_codon => position_in_codon + 1,
:substitution_type => sub_type
}
end
running_total += (stop - start)
running_total += 1 if @strand == '-' #how far we are into the cds
end
+>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
end
end#class end
class SynReport
#attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash
def initialize(opts)
+<<<<<<< HEAD
+ cdses = []
+ mrna_list = []
+ seqs = Hash.new
+
+ Bio::FastaFormat.open(opts[:fasta]).each { |seq| seqs[seq.entry_id] = seq.to_seq }
+ $stderr.puts "Loaded Seq..." if opts[:verbose]
+
+
+ @mrnas = Hash.new {|h,k| h[k] = Hash.new}
+ File.open(opts[:gff], "r").each do |gffline|
+ record = Bio::GFF::GFF3::Record.new(gffline)
+ if record.feature_type == 'mRNA'
+ mrna_list << Bio::Util::MrnaModel.new(gffline)
+ elsif record.feature_type =='CDS'
+ cdses << record
+ end
+ end
+
+ mrna_list.each do |mrna|
+ mrna_id = mrna.get_attributes("ID")
+ $stderr.puts "No ID for #{cds}" if mrna_id.empty?
+ mrna_id = mrna_id.first
+ @mrnas[mrna.seqname][mrna_id] = mrna
+ @mrnas[mrna.seqname][mrna_id].seq = seqs[mrna_id].seq
+ end
+
+ cdses.each do |cds|
+ cds_parent = cds.get_attributes("Parent")
+ $stderr.puts "No Parent for #{cds}" if cds_parent.empty?
+ cds_parent = cds_parent.first
+ @mrnas[cds.seqname][cds_parent].cds << [cds.start,cds.end]
+ end
+ $stderr.puts "Loaded GFF..." if opts[:verbose]
+
+
+ end#init end
+
+ def is_in_cds?(chr,point)
+ self.mutation_info(chr,point,"a") ? true : false
+=======
@gene_array = []
@cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
@mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
File.open(opts[:gff], "r").each do |gffline|
record=Bio::GFF::GFF3::Record.new(gffline)
@@ -122,20 +244,38 @@
$stderr.puts "Models built..." if opts[:verbose]
end#init end
def is_in_cds?(chr,point)
@self.mutation_info(chr,point) ? true : false
+>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
end
#returns mutation info if point in CDS, if not in CDS returns false
def mutation_info(chr,pos,alt)
+<<<<<<< HEAD
+ pos = pos.to_i
+ #cant do indels ...
+ return nil if alt.length > 1
+ begin
+ @mrnas[chr].each_pair do |mrna_id, mrna|
+ if mrna.includes?(chr,pos)
+ return mrna.substitution_info(pos,alt)
+ end
+ end
+ false
+ rescue
+ #somthing unpredicatable went wrong and we couldnt do the conversion ...
+ return nil
+ end
+=======
@models[chr].each do |m|
if m.includes?(chr,pos)
return m.substitution_info(chr,pos,alt)
end
end
false
+>>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
end
end#class end
end#class util end
\ No newline at end of file