lib/cheripic/contig_pileups.rb in cheripic-1.2.0 vs lib/cheripic/contig_pileups.rb in cheripic-1.2.5

- old
+ new

@@ -30,31 +30,42 @@ def_delegators :@mut_bulk, :each, :each_key, :each_value, :length, :[], :store def_delegators :@bg_bulk, :each, :each_key, :each_value, :length, :[], :store def_delegators :@mut_parent, :each, :each_key, :each_value, :length, :[], :store def_delegators :@bg_parent, :each, :each_key, :each_value, :length, :[], :store attr_accessor :id, :parent_hemi - attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent + attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent, :masked_regions # creates a ContigPileup object using fasta entry id # @param fasta [String] a contig id from fasta entry def initialize (fasta) @id = fasta @mut_bulk = {} @bg_bulk = {} @mut_parent = {} @bg_parent = {} @parent_hemi = {} + @masked_regions = Hash.new { |h,k| h[k] = {} } + @hm_pos = {} + @ht_pos = {} + @hemi_pos = {} end # bulk pileups are compared and variant positions are selected # @return [Array<Hash>] variant positions are stored in hashes # for homozygous, heterozygous and hemi-variant positions def bulks_compared - @hm_pos = {} - @ht_pos = {} - @hemi_pos = {} @mut_bulk.each_key do | pos | + ignore = 0 + unless @masked_regions.empty? + @masked_regions.each_key do | index | + if pos.between?(@masked_regions[index][:begin], @masked_regions[index][:end]) + ignore = 1 + logger.info "variant is in the masked region\t#{@mut_bulk[pos].to_s}" + end + end + end + next if ignore == 1 if Options.polyploidy and @parent_hemi.key?(pos) bg_bases = '' if @bg_bulk.key?(pos) bg_bases = @bg_bulk[pos].var_base_frac end @@ -72,31 +83,41 @@ # Empty hash results from position below selected coverage # or bases freq below noise and such positions are deleted. # @param pos [Integer] position in the contig # stores variant type, position and allele fraction to either @hm_pos or @ht_pos hashes def compare_pileup(pos) - base_hash = @mut_bulk[pos].var_base_frac - base_hash.delete(:ref) - return nil if base_hash.empty? - # we could ignore complex loci or - # take the variant type based on predominant base - if base_hash.length > 1 - fraction = base_hash.values.max - mut_type = var_mode(fraction) - else - fraction = base_hash[base_hash.keys[0]] - mut_type = var_mode(fraction) - end + mut_type, fraction = var_mode_fraction(@mut_bulk[pos]) + return nil if mut_type.nil? if @bg_bulk.key?(pos) - bg_type = bg_bulk_var(pos) + bg_type = var_mode_fraction(@bg_bulk[pos])[0] mut_type = compare_var_type(mut_type, bg_type) end - unless mut_type == nil + unless mut_type.nil? categorise_pos(mut_type, pos, fraction) end end + + # Method to extract var_mode and allele fraction from pileup information at a position in contig + # + # @param pileup_info [Pileup] pileup object + # @return [Symbol] variant mode from pileup position (:hom or :het) at the position + # @return [Float] allele fraction at the position + def var_mode_fraction(pileup_info) + base_frac_hash = pileup_info.var_base_frac + base_frac_hash.delete(:ref) + return [nil, nil] if base_frac_hash.empty? + # we could ignore complex loci or + # take the variant type based on predominant base + if base_frac_hash.length > 1 + fraction = base_frac_hash.values.max + else + fraction = base_frac_hash[base_frac_hash.keys[0]] + end + [var_mode(fraction), fraction] + end + # Categorizes variant zygosity based on the allele fraction provided. # Uses lower and upper limit set for heterozygosity in the options. # @note consider increasing the range of heterozygosity limits for RNA-seq data # @param fraction [Float] allele fraction # @return [Symbol] of either :het or :hom to represent heterozygous or homozygous respectively @@ -123,27 +144,10 @@ else muttype end end - # Method to extract var_mode from pileup information at a position in contig - # - # @param pos [Integer] position in the contig - # @return [Symbol] variant mode of the background bulk (:hom or :het) at the position - def bg_bulk_var(pos) - bg_base_hash = @bg_bulk[pos].var_base_frac - bg_base_hash.delete(:ref) - return nil if bg_base_hash.empty? - if bg_base_hash.length > 1 - # taking only var mode - var_mode(bg_base_hash.values.max) - else - # taking only var mode - var_mode(bg_base_hash[bg_base_hash.keys[0]]) - end - end - # method stores pos as key and allele fraction as value # to @hm_pos or @ht_pos hash based on variant type # @param var_type [Symbol] values are either :hom or :het # @param pos [Integer] position in the contig # @param ratio [Float] allele fraction @@ -154,29 +158,29 @@ @ht_pos[pos] = ratio end end # Compares parental pileups for the contig and identify position - # that indicate variants from homelogues called hemi-snps + # that indicate variants from homeologues called hemi-snps # and calculates bulk frequency ratio (bfr) # @return [Hash] parent_hemi hash with position as key and bfr as value def hemisnps_in_parent # mark all the hemi snp based on both parents - self.mut_parent.each_key do |pos| + @mut_parent.each_key do |pos| mut_parent_frac = @mut_parent[pos].var_base_frac - if self.bg_parent.key?(pos) + if @bg_parent.key?(pos) bg_parent_frac = @bg_parent[pos].var_base_frac bfr = Bfr.get_bfr(mut_parent_frac, bg_parent_frac) @parent_hemi[pos] = bfr - self.bg_parent.delete(pos) + @bg_parent.delete(pos) else bfr = Bfr.get_bfr(mut_parent_frac) @parent_hemi[pos] = bfr end end # now include all hemi snp unique to background parent - self.bg_parent.each_key do |pos| + @bg_parent.each_key do |pos| unless @parent_hemi.key?(pos) bg_parent_frac = @bg_parent[pos].var_base_frac bfr = Bfr.get_bfr(bg_parent_frac) @parent_hemi[pos] = bfr end