class Bio::DB::Pileup

Attributes

ar1[RW]
ar2[RW]
ar3[RW]
consensus[RW]
consensus_quality[RW]
coverage[RW]
indel_1[RW]
indel_2[RW]
pos[RW]
read_bases[RW]
read_quals[RW]
ref_base[RW]
ref_name[RW]
rms_mapq[RW]
snp_quality[RW]

Public Class Methods

iupac_to_base(alt_base) click to toggle source

returns the two bases for the corresponding iupac code

# File lib/bio/db/pileup.rb, line 177
def Pileup.iupac_to_base(alt_base)
    case alt_base
          when 'K' then ['G','T']
          when 'M' then ['A','C']
          when 'S' then ['C','G']
          when 'R' then ['A','G']
          when 'W' then ['A','T']
          when 'Y' then ['C','T']
          else alt_base.split(//)
    end
end
new(pile_up_line) click to toggle source

creates the Pileup object

pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
pile = Bio::DB::Pileup.new(pile_up_line)
# File lib/bio/db/pileup.rb, line 35
def initialize(pile_up_line)
  cols = pile_up_line.split(/\t/)
  if cols.length == 6 ##should only be able to get 6 lines from mpileup
    @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols
  elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols...
    if cols[2] == '*' #indel
      @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @indel_1, @indel_2, @ar1, @ar2, @ar3 = cols
    else #snp / identity
      @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals = cols
    end
    @consensus_quality = @consensus_quality.to_f
    @snp_quality = @snp_quality.to_f
    @rms_mapq = @rms_mapq.to_f
  else
    #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed"
  end
    
  @pos = @pos.to_i
  @coverage = @coverage.to_f
  @ref_count = nil
  @non_ref_count_hash = nil 
  @non_ref_count = nil
end

Public Instance Methods

genotype_list() click to toggle source

identifies the reference base and returns the indel or snp genotype as applicable

# File lib/bio/db/pileup.rb, line 167
def genotype_list
  if self.ref_base == '*'
    return indel_gt
  else
   return snp_gt
  end
end
non_ref_count() click to toggle source

returns the total non-reference bases in the reads at this position

# File lib/bio/db/pileup.rb, line 69
def non_ref_count
  if @non_ref_count.nil?
    @non_ref_count = @read_bases.count("ATGCatgc").to_f
  end
  @non_ref_count
end
non_refs() click to toggle source

Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts returns a hash pile.non_refs #{:A => 1, :C => 0, :T => 0, :G => 0}

# File lib/bio/db/pileup.rb, line 61
def non_refs
  if @non_ref_count_hash.nil?
     @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")}
  end
    @non_ref_count_hash
end
parse_indel(alt) click to toggle source

identifies if the indel is an insertion or a deletion

# File lib/bio/db/pileup.rb, line 190
def parse_indel(alt)
  return "D#{$'.length}" if alt =~/^-/ 
  if alt=~/^\+/
    return "I#{$'}"
  elsif alt == '*' 
    return nil
  end
end
ref_count() click to toggle source

returns the count of reference-bases in the reads at this position

# File lib/bio/db/pileup.rb, line 77
def ref_count
  if @ref_count.nil?
    @ref_count = self.read_bases.count(".,")
  end
  @ref_count
end
to_s() click to toggle source

returns pileup format line

# File lib/bio/db/pileup.rb, line 201
def to_s
  if @read_quals and !@consensus_quality #6col
    [@ref_name, @pos, @ref_base, @coverage.to_i, @read_bases, @read_quals].join("\t")    
  elsif @indel_1 #13 cols
    [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @indel_1, @indel_2, @ar1, @ar2, @ar3].join("\t")
  else #10 cols
    [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @read_bases, @read_quals].join("\t")
  end
    
end
to_vcf() click to toggle source

returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a '*' instead of the reference allele

# File lib/bio/db/pileup.rb, line 103
def to_vcf
  alt,g = self.genotype_list 
  alt = self.consensus.split(//).join(',') unless self.ref_base == '*'
  alt = '.' if alt == self.ref_base
  [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t")
end