lib/cheripic/contig_pileups.rb in cheripic-1.2.0 vs lib/cheripic/contig_pileups.rb in cheripic-1.2.5
- old
+ new
@@ -30,31 +30,42 @@
def_delegators :@mut_bulk, :each, :each_key, :each_value, :length, :[], :store
def_delegators :@bg_bulk, :each, :each_key, :each_value, :length, :[], :store
def_delegators :@mut_parent, :each, :each_key, :each_value, :length, :[], :store
def_delegators :@bg_parent, :each, :each_key, :each_value, :length, :[], :store
attr_accessor :id, :parent_hemi
- attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent
+ attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent, :masked_regions
# creates a ContigPileup object using fasta entry id
# @param fasta [String] a contig id from fasta entry
def initialize (fasta)
@id = fasta
@mut_bulk = {}
@bg_bulk = {}
@mut_parent = {}
@bg_parent = {}
@parent_hemi = {}
+ @masked_regions = Hash.new { |h,k| h[k] = {} }
+ @hm_pos = {}
+ @ht_pos = {}
+ @hemi_pos = {}
end
# bulk pileups are compared and variant positions are selected
# @return [Array<Hash>] variant positions are stored in hashes
# for homozygous, heterozygous and hemi-variant positions
def bulks_compared
- @hm_pos = {}
- @ht_pos = {}
- @hemi_pos = {}
@mut_bulk.each_key do | pos |
+ ignore = 0
+ unless @masked_regions.empty?
+ @masked_regions.each_key do | index |
+ if pos.between?(@masked_regions[index][:begin], @masked_regions[index][:end])
+ ignore = 1
+ logger.info "variant is in the masked region\t#{@mut_bulk[pos].to_s}"
+ end
+ end
+ end
+ next if ignore == 1
if Options.polyploidy and @parent_hemi.key?(pos)
bg_bases = ''
if @bg_bulk.key?(pos)
bg_bases = @bg_bulk[pos].var_base_frac
end
@@ -72,31 +83,41 @@
# Empty hash results from position below selected coverage
# or bases freq below noise and such positions are deleted.
# @param pos [Integer] position in the contig
# stores variant type, position and allele fraction to either @hm_pos or @ht_pos hashes
def compare_pileup(pos)
- base_hash = @mut_bulk[pos].var_base_frac
- base_hash.delete(:ref)
- return nil if base_hash.empty?
- # we could ignore complex loci or
- # take the variant type based on predominant base
- if base_hash.length > 1
- fraction = base_hash.values.max
- mut_type = var_mode(fraction)
- else
- fraction = base_hash[base_hash.keys[0]]
- mut_type = var_mode(fraction)
- end
+ mut_type, fraction = var_mode_fraction(@mut_bulk[pos])
+ return nil if mut_type.nil?
if @bg_bulk.key?(pos)
- bg_type = bg_bulk_var(pos)
+ bg_type = var_mode_fraction(@bg_bulk[pos])[0]
mut_type = compare_var_type(mut_type, bg_type)
end
- unless mut_type == nil
+ unless mut_type.nil?
categorise_pos(mut_type, pos, fraction)
end
end
+
+ # Method to extract var_mode and allele fraction from pileup information at a position in contig
+ #
+ # @param pileup_info [Pileup] pileup object
+ # @return [Symbol] variant mode from pileup position (:hom or :het) at the position
+ # @return [Float] allele fraction at the position
+ def var_mode_fraction(pileup_info)
+ base_frac_hash = pileup_info.var_base_frac
+ base_frac_hash.delete(:ref)
+ return [nil, nil] if base_frac_hash.empty?
+ # we could ignore complex loci or
+ # take the variant type based on predominant base
+ if base_frac_hash.length > 1
+ fraction = base_frac_hash.values.max
+ else
+ fraction = base_frac_hash[base_frac_hash.keys[0]]
+ end
+ [var_mode(fraction), fraction]
+ end
+
# Categorizes variant zygosity based on the allele fraction provided.
# Uses lower and upper limit set for heterozygosity in the options.
# @note consider increasing the range of heterozygosity limits for RNA-seq data
# @param fraction [Float] allele fraction
# @return [Symbol] of either :het or :hom to represent heterozygous or homozygous respectively
@@ -123,27 +144,10 @@
else
muttype
end
end
- # Method to extract var_mode from pileup information at a position in contig
- #
- # @param pos [Integer] position in the contig
- # @return [Symbol] variant mode of the background bulk (:hom or :het) at the position
- def bg_bulk_var(pos)
- bg_base_hash = @bg_bulk[pos].var_base_frac
- bg_base_hash.delete(:ref)
- return nil if bg_base_hash.empty?
- if bg_base_hash.length > 1
- # taking only var mode
- var_mode(bg_base_hash.values.max)
- else
- # taking only var mode
- var_mode(bg_base_hash[bg_base_hash.keys[0]])
- end
- end
-
# method stores pos as key and allele fraction as value
# to @hm_pos or @ht_pos hash based on variant type
# @param var_type [Symbol] values are either :hom or :het
# @param pos [Integer] position in the contig
# @param ratio [Float] allele fraction
@@ -154,29 +158,29 @@
@ht_pos[pos] = ratio
end
end
# Compares parental pileups for the contig and identify position
- # that indicate variants from homelogues called hemi-snps
+ # that indicate variants from homeologues called hemi-snps
# and calculates bulk frequency ratio (bfr)
# @return [Hash] parent_hemi hash with position as key and bfr as value
def hemisnps_in_parent
# mark all the hemi snp based on both parents
- self.mut_parent.each_key do |pos|
+ @mut_parent.each_key do |pos|
mut_parent_frac = @mut_parent[pos].var_base_frac
- if self.bg_parent.key?(pos)
+ if @bg_parent.key?(pos)
bg_parent_frac = @bg_parent[pos].var_base_frac
bfr = Bfr.get_bfr(mut_parent_frac, bg_parent_frac)
@parent_hemi[pos] = bfr
- self.bg_parent.delete(pos)
+ @bg_parent.delete(pos)
else
bfr = Bfr.get_bfr(mut_parent_frac)
@parent_hemi[pos] = bfr
end
end
# now include all hemi snp unique to background parent
- self.bg_parent.each_key do |pos|
+ @bg_parent.each_key do |pos|
unless @parent_hemi.key?(pos)
bg_parent_frac = @bg_parent[pos].var_base_frac
bfr = Bfr.get_bfr(bg_parent_frac)
@parent_hemi[pos] = bfr
end