class Bio::DB::Pileup
Attributes
ar1[RW]
ar2[RW]
ar3[RW]
consensus[RW]
consensus_quality[RW]
coverage[RW]
indel_1[RW]
indel_2[RW]
pos[RW]
read_bases[RW]
read_quals[RW]
ref_base[RW]
ref_name[RW]
rms_mapq[RW]
snp_quality[RW]
Public Class Methods
iupac_to_base(alt_base)
click to toggle source
returns the two bases for the corresponding iupac code
# File lib/bio/db/pileup.rb, line 177 def Pileup.iupac_to_base(alt_base) case alt_base when 'K' then ['G','T'] when 'M' then ['A','C'] when 'S' then ['C','G'] when 'R' then ['A','G'] when 'W' then ['A','T'] when 'Y' then ['C','T'] else alt_base.split(//) end end
new(pile_up_line)
click to toggle source
creates the Pileup object
pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<" pile = Bio::DB::Pileup.new(pile_up_line)
# File lib/bio/db/pileup.rb, line 35 def initialize(pile_up_line) cols = pile_up_line.split(/\t/) if cols.length == 6 ##should only be able to get 6 lines from mpileup @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols... if cols[2] == '*' #indel @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @indel_1, @indel_2, @ar1, @ar2, @ar3 = cols else #snp / identity @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals = cols end @consensus_quality = @consensus_quality.to_f @snp_quality = @snp_quality.to_f @rms_mapq = @rms_mapq.to_f else #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed" end @pos = @pos.to_i @coverage = @coverage.to_f @ref_count = nil @non_ref_count_hash = nil @non_ref_count = nil end
Public Instance Methods
genotype_list()
click to toggle source
identifies the reference base and returns the indel or snp genotype as applicable
# File lib/bio/db/pileup.rb, line 167 def genotype_list if self.ref_base == '*' return indel_gt else return snp_gt end end
non_ref_count()
click to toggle source
returns the total non-reference bases in the reads at this position
# File lib/bio/db/pileup.rb, line 69 def non_ref_count if @non_ref_count.nil? @non_ref_count = @read_bases.count("ATGCatgc").to_f end @non_ref_count end
non_refs()
click to toggle source
Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts returns a hash pile.non_refs #{:A => 1, :C => 0, :T => 0, :G => 0}
# File lib/bio/db/pileup.rb, line 61 def non_refs if @non_ref_count_hash.nil? @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")} end @non_ref_count_hash end
parse_indel(alt)
click to toggle source
identifies if the indel is an insertion or a deletion
# File lib/bio/db/pileup.rb, line 190 def parse_indel(alt) return "D#{$'.length}" if alt =~/^-/ if alt=~/^\+/ return "I#{$'}" elsif alt == '*' return nil end end
ref_count()
click to toggle source
returns the count of reference-bases in the reads at this position
# File lib/bio/db/pileup.rb, line 77 def ref_count if @ref_count.nil? @ref_count = self.read_bases.count(".,") end @ref_count end
to_s()
click to toggle source
returns pileup format line
# File lib/bio/db/pileup.rb, line 201 def to_s if @read_quals and !@consensus_quality #6col [@ref_name, @pos, @ref_base, @coverage.to_i, @read_bases, @read_quals].join("\t") elsif @indel_1 #13 cols [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @indel_1, @indel_2, @ar1, @ar2, @ar3].join("\t") else #10 cols [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @read_bases, @read_quals].join("\t") end end
to_vcf()
click to toggle source
returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a '*' instead of the reference allele
# File lib/bio/db/pileup.rb, line 103 def to_vcf alt,g = self.genotype_list alt = self.consensus.split(//).join(',') unless self.ref_base == '*' alt = '.' if alt == self.ref_base [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t") end