lib/cheripic/contig_pileups.rb in cheripic-1.0.0 vs lib/cheripic/contig_pileups.rb in cheripic-1.1.0
- old
+ new
@@ -2,12 +2,29 @@
require 'bio'
require 'forwardable'
module Cheripic
+ # Custom error handling for ContigPileup class
class ContigPileupsError < CheripicError; end
+ # A ContigPileup object for each contig from assembly that stores
+ # pileup file information and variants are selected from analysis of pileup files
+ # selected variants from pileup files is stored as hashes
+ #
+ # @!attribute [rw] id
+ # @return [String] id of the contig in assembly taken from fasta file
+ # @!attribute [rw] mut_bulk
+ # @return [Hash] a hash of variant positions from mut_bulk as keys and pileup info as values
+ # @!attribute [rw] bg_bulk
+ # @return [Hash] a hash of variant positions from bg_bulk as keys and pileup info as values
+ # @!attribute [rw] mut_parent
+ # @return [Hash] a hash of variant positions from mut_parent as keys and pileup info as values
+ # @!attribute [rw] bg_parent
+ # @return [Hash] a hash of variant positions from bg_parent as keys and pileup info as values
+ # @!attribute [rw] parent_hemi
+ # @return [Hash] a hash of hemi-variant positions as keys and bfr calculated from parent bulks as values
class ContigPileups
include Enumerable
extend Forwardable
def_delegators :@mut_bulk, :each, :each_key, :each_value, :length, :[], :store
@@ -15,25 +32,30 @@
def_delegators :@mut_parent, :each, :each_key, :each_value, :length, :[], :store
def_delegators :@bg_parent, :each, :each_key, :each_value, :length, :[], :store
attr_accessor :id, :parent_hemi
attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent
+ # creates a ContigPileup object using fasta entry id
+ # @param fasta [String] a contig id from fasta entry
def initialize (fasta)
@id = fasta
@mut_bulk = {}
@bg_bulk = {}
@mut_parent = {}
@bg_parent = {}
@parent_hemi = {}
end
+ # bulk pileups are compared and variant positions are selected
+ # @return [Array<Hash>] variant positions are stored in hashes
+ # for homozygous, heterozygous and hemi-variant positions
def bulks_compared
@hm_pos = {}
@ht_pos = {}
@hemi_pos = {}
@mut_bulk.each_key do | pos |
- if Options.params.polyploidy and @parent_hemi.key?(pos)
+ if Options.polyploidy and @parent_hemi.key?(pos)
bg_bases = ''
if @bg_bulk.key?(pos)
bg_bases = @bg_bulk[pos].var_base_frac
end
mut_bases = @mut_bulk[pos].var_base_frac
@@ -44,76 +66,98 @@
end
end
[@hm_pos, @ht_pos, @hemi_pos]
end
- # we are only dealing with single element hashes
- # so discard hashes with more than one element and empty hashes
- # empty hash results from position below selected coverage or bases freq below noise
+ # mut_bulk and bg_bulk pileups are compared at selected position of the contig.
+ # Empty hash results from position below selected coverage
+ # or bases freq below noise and such positions are deleted.
+ # @param pos [Integer] position in the contig
+ # stores variant type, position and allele fraction to either @hm_pos or @ht_pos hashes
def compare_pileup(pos)
base_hash = @mut_bulk[pos].var_base_frac
base_hash.delete(:ref)
return nil if base_hash.empty?
# we could ignore complex loci or
# take the variant type based on predominant base
if base_hash.length > 1
- mut_type, ratio = var_mode(base_hash.values.max)
+ fraction = base_hash.values.max
+ mut_type = var_mode(fraction)
else
- base = base_hash.keys[0]
- mut_type, ratio = var_mode(base_hash[base])
+ fraction = base_hash[base_hash.keys[0]]
+ mut_type = var_mode(fraction)
end
if @bg_bulk.key?(pos)
bg_type = bg_bulk_var(pos)
mut_type = compare_var_type(mut_type, bg_type)
end
unless mut_type == nil
- categorise_pos(mut_type, pos, ratio)
+ categorise_pos(mut_type, pos, fraction)
end
end
- # if both bulks have homozygous var at this position
- # then ignore the position
+ # Categorizes variant zygosity based on the allele fraction provided.
+ # Uses lower and upper limit set for heterozygosity in the options.
+ # @note consider increasing the range of heterozygosity limits for RNA-seq data
+ # @param fraction [Float] allele fraction
+ # @return [Symbol] of either :het or :hom to represent heterozygous or homozygous respectively
+ def var_mode(fraction)
+ ht_low = Options.htlow
+ ht_high = Options.hthigh
+ mode = ''
+ if fraction.between?(ht_low, ht_high)
+ mode = :het
+ elsif fraction > ht_high
+ mode = :hom
+ end
+ mode
+ end
+
+ # Simple comparison of variant type of mut and bg bulks at a position
+ # If both bulks have homozygous variant at selected position then it is ignored
+ # @param muttype [Symbol] values are either :hom or :het
+ # @param bgtype [Symbol] values are either :hom or :het
+ # @return [Symbol] variant mode of the mut bulk (:hom or :het) at the position or nil
def compare_var_type(muttype, bgtype)
if muttype == :hom and bgtype == :hom
nil
else
muttype
end
end
+ # Method to extract var_mode from pileup information at a position in contig
+ #
+ # @param pos [Integer] position in the contig
+ # @return [Symbol] variant mode of the background bulk (:hom or :het) at the position
def bg_bulk_var(pos)
bg_base_hash = @bg_bulk[pos].var_base_frac
if bg_base_hash.length > 1
# taking only var mode
- var_mode(bg_base_hash.values.max)[0]
+ var_mode(bg_base_hash.values.max)
else
# taking only var mode
- var_mode(bg_base_hash[0])[0]
+ var_mode(bg_base_hash[0])
end
end
+ # method stores pos as key and allele fraction as value
+ # to @hm_pos or @ht_pos hash based on variant type
+ # @param var_type [Symbol] values are either :hom or :het
+ # @param pos [Integer] position in the contig
+ # @param ratio [Float] allele fraction
def categorise_pos(var_type, pos, ratio)
if var_type == :hom
@hm_pos[pos] = ratio
elsif var_type == :het
@ht_pos[pos] = ratio
end
end
- # calculate var zygosity for non-polyploid variants
- # increased range is used for heterozygosity for RNA-seq data
- def var_mode(ratio)
- ht_low = Options.params.htlow
- ht_high = Options.params.hthigh
- mode = ''
- if ratio.between?(ht_low, ht_high)
- mode = :het
- elsif ratio > ht_high
- mode = :hom
- end
- [mode, ratio]
- end
-
+ # Compares parental pileups for the contig and identify position
+ # that indicate variants from homelogues called hemi-snps
+ # and calculates bulk frequency ratio (bfr)
+ # @return [Hash] parent_hemi hash with position as key and bfr as value
def hemisnps_in_parent
# mark all the hemi snp based on both parents
self.mut_parent.each_key do |pos|
mut_parent_frac = @mut_parent[pos].var_base_frac
if self.bg_parent.key?(pos)