lib/cheripic/variants.rb in cheripic-1.0.0 vs lib/cheripic/variants.rb in cheripic-1.1.0

- old
+ new

@@ -2,19 +2,37 @@ require 'bio' require 'forwardable' module Cheripic + # Custom error handling for Variants class class VariantsError < CheripicError; end + # A Variants object for each analysis pipeline that stores + # assembly details and extracts pileups for each contig + # assembly and pileup details are stored as + # hashes of Contig and ContigPileups objects + # + # @!attribute [r] assembly + # @return [Hash] a hash of contig ids from assembly as keys and respective Contig objects as values + # @!attribute [r] pileups + # @return [Hash] a hash of contig ids from assembly as keys and respective ContigPileups objects as values + # @!attribute [r] hmes_frags + # @return [Hash] a hash of contigs with selected hme score, a subset of assembly hash + # @!attribute [r] bfr_frags + # @return [Hash] a hash of contigs with selected bfr score, a subset of assembly hash + # @!attribute [r] pileups_analyzed + # @return [Boolean] a Boolean option to check if pileups for the assembly are analyzed or not class Variants include Enumerable extend Forwardable def_delegators :@assembly, :each, :each_key, :each_value, :size, :length, :[] - attr_accessor :assembly, :has_run, :pileups, :hmes_frags, :bfr_frags + attr_reader :assembly, :pileups, :hmes_frags, :bfr_frags, :pileups_analyzed + # creates a Variants object using user input files + # @param options [Hash] a hash of required input files as keys and file paths as values def initialize(options) @params = options @assembly = {} @pileups = {} Bio::FastaFormat.open(@params.assembly).each do |entry| @@ -29,14 +47,15 @@ raise VariantsError end @assembly[contig.id] = contig @pileups[contig.id] = ContigPileups.new(contig.id) end + @pileups_analyzed = false end - # Read and store pileup data for each bulk and parents - # + # Reads and store pileup data for each of input bulk and parents pileup files + # And sets pileups_analyzed to true that pileups files are processed def analyse_pileups @bg_bulk = @params.bg_bulk @mut_parent = @params.mut_parent @bg_parent = @params.bg_parent @@ -45,13 +64,17 @@ if infile != '' extract_pileup(infile, input) end end - @has_run = true + @pileups_analyzed = true end + # Input pileup file is read and positions are selected that pass the thresholds + # @param pileupfile [String] path to the pileup file to read + # @param sym [Symbol] Symbol of the pileup file used to write selected variants + # pileup information to respective ContigPileups object def extract_pileup(pileupfile, sym) # read mpileup file and process each variant File.foreach(pileupfile) do |line| pileup = Pileup.new(line) if pileup.is_var @@ -59,12 +82,16 @@ contig_obj.send(sym).store(pileup.pos, pileup) end end end + # Once pileup files are analysed and variants are extracted from each bulk; + # bulks are compared to identify and isolate variants for downstream analysis. + # If polyploidy set to trye and mut_parent and bg_parent bulks are provided + # hemisnps in parents are extracted for bulk frequency ratio analysis def compare_pileups - unless defined?(@has_run) + unless @pileups_analyzed self.analyse_pileups end @assembly.each_key do | id | contig = @assembly[id] # extract parental hemi snps for polyploids before bulks are compared @@ -73,30 +100,39 @@ end contig.hm_pos, contig.ht_pos, contig.hemi_pos = @pileups[id].bulks_compared end end + # From Assembly contig objects, contigs are selected based on user selected options + # for homozygosity enrichment score (hme_score) def hmes_frags # calculate every time method gets called @hmes_frags = select_contigs(:hme_score) end + # From Assembly contig objects, contigs are selected based on user selected options + # for bulk frequency ratio (bfr_score) def bfr_frags unless defined?(@bfr_frags) @bfr_frags = select_contigs(:bfr_score) end @bfr_frags end + # Applies selection procedure on assembly contigs based on the ratio_type provided. + # If only_frag_with_vars is set to true then contigs without any variant are discarded for :hme_score + # while contigs without any hemisnps are discarded for :bfr_score + # If filter_out_low_hmes is set to true then contigs are further filtered based on a cut off value of the score + # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score def select_contigs(ratio_type) selected_contigs ={} - only_frag_with_vars = Options.params.only_frag_with_vars + only_frag_with_vars = Options.only_frag_with_vars @assembly.each_key do | frag | if only_frag_with_vars if ratio_type == :hme_score # selecting fragments which have a variant - if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.params.hmes_adjust + if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.hmes_adjust selected_contigs[frag] = @assembly[frag] end else # ratio_type == :bfr_score # in polyploidy scenario selecting fragments with at least one bfr position if @assembly[frag].hemi_num > 0 @@ -114,29 +150,37 @@ logger.info "No filtering was applied to fragments\n" end selected_contigs end + # Filters out contigs below a cutoff for selected ratio_type + # a cutoff value is calculated based on ratio_type provided + # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score + # @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash def filter_contigs(selected_contigs, ratio_type) cutoff = get_cutoff(selected_contigs, ratio_type) selected_contigs.each_key do | frag | if selected_contigs[frag].send(ratio_type) < cutoff selected_contigs.delete(frag) end end selected_contigs end + # Cut off value calculation used to filter out low scored contigs. + # + # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score + # @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash def get_cutoff(selected_contigs, ratio_type) - filter_out_low_hmes = Options.params.filter_out_low_hmes + filter_out_low_hmes = Options.filter_out_low_hmes # set minimum cut off hme_score or bfr_score to pick fragments with variants # calculate min hme score for back or out crossed data or bfr_score for polypoidy data # if no filtering applied set cutoff to 1.1 if filter_out_low_hmes if ratio_type == :hme_score - adjust = Options.params.hmes_adjust - if Options.params.cross_type == 'back' + adjust = Options.hmes_adjust + if Options.cross_type == 'back' cutoff = (1.0/adjust) + 1.0 else # outcross cutoff = (2.0/adjust) + 1.0 end else # ratio_type is bfr_score @@ -146,10 +190,13 @@ cutoff = 0.0 end cutoff end + # Cut off value calculation for bfr contigs. + # ratio value at index 0.1% length of an array or at index zero of an array that contains decreasing order of bfr ratios + # @param selected_contigs [Hash] a hash of contigs with selected bfr score, a subset of assembly hash def bfr_cutoff(selected_contigs, prop=0.1) ratios = [] selected_contigs.each_key do | frag | ratios << selected_contigs[frag].bfr_score end @@ -160,11 +207,11 @@ index = 1 end ratios[index - 1] end - # method is to discard homozygous variant positions for which background bulk - # pileup shows proportion higher than 0.35 for variant allele/non-reference allele + # Method is to discard homozygous variant positions for which background bulk + # pileup shows a fraction value higher than 0.35 for variant allele/non-reference allele # a recessive variant is expected to have 1/3rd frequency in background bulk def verify_bg_bulk_pileup unless defined?(@hmes_frags) self.hmes_frags end