lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.1 vs lib/bio/utils/bio-synreport.rb in bio-synreport-0.1.2
- old
+ new
@@ -3,11 +3,10 @@
require 'bio'
module Bio
class Util
-<<<<<<< HEAD
class MrnaModel < Bio::GFF::GFF3::Record
attr_accessor :seq, :cds
def initialize(gff_line)
super gff_line
@cds = []
@@ -76,83 +75,28 @@
b = Bio::Sequence::NA.new(new_codon).translate.codes.first
sub_type = a == b ? "SYN" : "NON_SYN"
return {#:id => self.gffid,
:chr => self.seqname,
:strand => self.strand,
-=======
- class MrnaModel
- attr_accessor :seqname, :gff_id, :strand, :cds, :sequences
-
- def initialize(chr, id, strand, cds_arr, seq_arr)
- @seqname, @gff_id, @strand, @cds, @sequences = chr, id, strand, cds_arr, seq_arr
- end
-
- def includes?(seq, point)
- @cds.each {|start, stop| return true if @seqname == seq and point.to_i >= start and point.to_i <= stop}
- false
- end
-
- def seq
- @sequences.join
- end
-
- def substitution_info(chr,point,alt)
- cds_start = @cds.first.first
- running_total = 0
- @cds.each do |start,stop|
- if point.to_i >= start and point.to_i <= stop
- offset = case @strand
- when "+"
- #offset =
- (point.to_i - start) + running_total
- when "-"
- (stop - point.to_i) + running_total
- end #offset = how far into cds SNP is
- codon_number = offset / 3
- position_in_codon = offset % 3
- #pp [offset, codon_number, position_in_codon]
- codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
- codon = codon_array[codon_number]
- nt = codon[position_in_codon]
- new_codon = codon.dup
- new_codon[position_in_codon] = alt.downcase
- #pp [codon, position_in_codon, nt, new_codon]
- a = Bio::Sequence::NA.new(codon).translate.codes.first
- b = Bio::Sequence::NA.new(new_codon).translate.codes.first
- sub_type = a == b ? "SYN" : "NON_SYN"
- return {:id => @gff_id,
- :chr => @seqname,
- :strand => @strand,
->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
:position => point,
:original_codon => codon,
:original_residue => a || 'stop',
:mutant_codon => new_codon,
:mutant_residue =>b || 'stop',
-<<<<<<< HEAD
:position_in_codon => position + 1,
:substitution_type => sub_type
}
-=======
- :position_in_codon => position_in_codon + 1,
- :substitution_type => sub_type
- }
- end
- running_total += (stop - start)
- running_total += 1 if @strand == '-' #how far we are into the cds
- end
->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
+
end
end#class end
class SynReport
#attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash
def initialize(opts)
-<<<<<<< HEAD
cdses = []
mrna_list = []
seqs = Hash.new
Bio::FastaFormat.open(opts[:fasta]).each { |seq| seqs[seq.entry_id] = seq.to_seq }
@@ -188,72 +132,14 @@
end#init end
def is_in_cds?(chr,point)
self.mutation_info(chr,point,"a") ? true : false
-=======
- @gene_array = []
- @cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
- @mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
- File.open(opts[:gff], "r").each do |gffline|
- record=Bio::GFF::GFF3::Record.new(gffline)
- if(record.feature_type == 'gene')
- @gene_array << [record.seqname, record.id]
- elsif(record.feature_type == 'CDS' or record.feature_type == 'mRNA')
- parents = record.get_attributes('Parent')
- parents.each do |parent|
- if record.feature_type == 'CDS'
- @cdshash[record.seqname][parent] << record
- else
- @mRNAhash[record.seqname][parent] << record
- end
- end
- end
end
- $stderr.puts "Loaded GFF..." if opts[:verbose]
- @seqhash = {}
- Bio::FastaFormat.open(opts[:fasta]).each { |seq| @seqhash[seq.entry_id] = seq.to_seq }
- $stderr.puts "Loaded Seq..." if opts[:verbose]
- @models = Hash.new {|h,k| h[k] = [] }
- $stderr.puts "Building models..." if opts[:verbose]
- @gene_array.each do |gene|
-
- mRNAs=@mRNAhash[gene.first][gene.last]
- mRNAs.each do |mRNA|
- next if @seqhash[gene.first].nil?
- cdsa = []
- seqs = []
- cdsary=@cdshash[gene.first][mRNA.id]
- cdsary.each {|c| cdsa << [c.start, c.end]}
- cdsa.sort!
- cdsa.reverse! if mRNA.strand == '-'
-
- cdsa.each do |cds|
-
- #cdsa << [cds.start, cds.end]
- if mRNA.strand == '+'
- seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") )
- elsif mRNA.strand == "-"
- seqs << Bio::Sequence::NA.new(@seqhash[mRNA.seqname].splicing("#{cds.first}..#{cds.last}") ).complement
- end
- end
- @models[mRNA.seqname] << Bio::Util::MrnaModel.new(mRNA.seqname, mRNA.id, mRNA.strand, cdsa, seqs )
- #pp @models[mRNA.seqname][-1].cds if mRNA.id == 'AT2G17530.1' or mRNA.id == 'AT2G17550.1'
- end
- end
- $stderr.puts "Models built..." if opts[:verbose]
- end#init end
-
- def is_in_cds?(chr,point)
- @self.mutation_info(chr,point) ? true : false
->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
- end
-
#returns mutation info if point in CDS, if not in CDS returns false
def mutation_info(chr,pos,alt)
-<<<<<<< HEAD
pos = pos.to_i
#cant do indels ...
return nil if alt.length > 1
begin
@mrnas[chr].each_pair do |mrna_id, mrna|
@@ -264,18 +150,9 @@
false
rescue
#somthing unpredicatable went wrong and we couldnt do the conversion ...
return nil
end
-=======
-
- @models[chr].each do |m|
- if m.includes?(chr,pos)
- return m.substitution_info(chr,pos,alt)
- end
- end
- false
->>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
end
end#class end
end#class util end
\ No newline at end of file