lib/cheripic/variants.rb in cheripic-1.0.0 vs lib/cheripic/variants.rb in cheripic-1.1.0
- old
+ new
@@ -2,19 +2,37 @@
require 'bio'
require 'forwardable'
module Cheripic
+ # Custom error handling for Variants class
class VariantsError < CheripicError; end
+ # A Variants object for each analysis pipeline that stores
+ # assembly details and extracts pileups for each contig
+ # assembly and pileup details are stored as
+ # hashes of Contig and ContigPileups objects
+ #
+ # @!attribute [r] assembly
+ # @return [Hash] a hash of contig ids from assembly as keys and respective Contig objects as values
+ # @!attribute [r] pileups
+ # @return [Hash] a hash of contig ids from assembly as keys and respective ContigPileups objects as values
+ # @!attribute [r] hmes_frags
+ # @return [Hash] a hash of contigs with selected hme score, a subset of assembly hash
+ # @!attribute [r] bfr_frags
+ # @return [Hash] a hash of contigs with selected bfr score, a subset of assembly hash
+ # @!attribute [r] pileups_analyzed
+ # @return [Boolean] a Boolean option to check if pileups for the assembly are analyzed or not
class Variants
include Enumerable
extend Forwardable
def_delegators :@assembly, :each, :each_key, :each_value, :size, :length, :[]
- attr_accessor :assembly, :has_run, :pileups, :hmes_frags, :bfr_frags
+ attr_reader :assembly, :pileups, :hmes_frags, :bfr_frags, :pileups_analyzed
+ # creates a Variants object using user input files
+ # @param options [Hash] a hash of required input files as keys and file paths as values
def initialize(options)
@params = options
@assembly = {}
@pileups = {}
Bio::FastaFormat.open(@params.assembly).each do |entry|
@@ -29,14 +47,15 @@
raise VariantsError
end
@assembly[contig.id] = contig
@pileups[contig.id] = ContigPileups.new(contig.id)
end
+ @pileups_analyzed = false
end
- # Read and store pileup data for each bulk and parents
- #
+ # Reads and store pileup data for each of input bulk and parents pileup files
+ # And sets pileups_analyzed to true that pileups files are processed
def analyse_pileups
@bg_bulk = @params.bg_bulk
@mut_parent = @params.mut_parent
@bg_parent = @params.bg_parent
@@ -45,13 +64,17 @@
if infile != ''
extract_pileup(infile, input)
end
end
- @has_run = true
+ @pileups_analyzed = true
end
+ # Input pileup file is read and positions are selected that pass the thresholds
+ # @param pileupfile [String] path to the pileup file to read
+ # @param sym [Symbol] Symbol of the pileup file used to write selected variants
+ # pileup information to respective ContigPileups object
def extract_pileup(pileupfile, sym)
# read mpileup file and process each variant
File.foreach(pileupfile) do |line|
pileup = Pileup.new(line)
if pileup.is_var
@@ -59,12 +82,16 @@
contig_obj.send(sym).store(pileup.pos, pileup)
end
end
end
+ # Once pileup files are analysed and variants are extracted from each bulk;
+ # bulks are compared to identify and isolate variants for downstream analysis.
+ # If polyploidy set to trye and mut_parent and bg_parent bulks are provided
+ # hemisnps in parents are extracted for bulk frequency ratio analysis
def compare_pileups
- unless defined?(@has_run)
+ unless @pileups_analyzed
self.analyse_pileups
end
@assembly.each_key do | id |
contig = @assembly[id]
# extract parental hemi snps for polyploids before bulks are compared
@@ -73,30 +100,39 @@
end
contig.hm_pos, contig.ht_pos, contig.hemi_pos = @pileups[id].bulks_compared
end
end
+ # From Assembly contig objects, contigs are selected based on user selected options
+ # for homozygosity enrichment score (hme_score)
def hmes_frags
# calculate every time method gets called
@hmes_frags = select_contigs(:hme_score)
end
+ # From Assembly contig objects, contigs are selected based on user selected options
+ # for bulk frequency ratio (bfr_score)
def bfr_frags
unless defined?(@bfr_frags)
@bfr_frags = select_contigs(:bfr_score)
end
@bfr_frags
end
+ # Applies selection procedure on assembly contigs based on the ratio_type provided.
+ # If only_frag_with_vars is set to true then contigs without any variant are discarded for :hme_score
+ # while contigs without any hemisnps are discarded for :bfr_score
+ # If filter_out_low_hmes is set to true then contigs are further filtered based on a cut off value of the score
+ # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
def select_contigs(ratio_type)
selected_contigs ={}
- only_frag_with_vars = Options.params.only_frag_with_vars
+ only_frag_with_vars = Options.only_frag_with_vars
@assembly.each_key do | frag |
if only_frag_with_vars
if ratio_type == :hme_score
# selecting fragments which have a variant
- if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.params.hmes_adjust
+ if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.hmes_adjust
selected_contigs[frag] = @assembly[frag]
end
else # ratio_type == :bfr_score
# in polyploidy scenario selecting fragments with at least one bfr position
if @assembly[frag].hemi_num > 0
@@ -114,29 +150,37 @@
logger.info "No filtering was applied to fragments\n"
end
selected_contigs
end
+ # Filters out contigs below a cutoff for selected ratio_type
+ # a cutoff value is calculated based on ratio_type provided
+ # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
+ # @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash
def filter_contigs(selected_contigs, ratio_type)
cutoff = get_cutoff(selected_contigs, ratio_type)
selected_contigs.each_key do | frag |
if selected_contigs[frag].send(ratio_type) < cutoff
selected_contigs.delete(frag)
end
end
selected_contigs
end
+ # Cut off value calculation used to filter out low scored contigs.
+ #
+ # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
+ # @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash
def get_cutoff(selected_contigs, ratio_type)
- filter_out_low_hmes = Options.params.filter_out_low_hmes
+ filter_out_low_hmes = Options.filter_out_low_hmes
# set minimum cut off hme_score or bfr_score to pick fragments with variants
# calculate min hme score for back or out crossed data or bfr_score for polypoidy data
# if no filtering applied set cutoff to 1.1
if filter_out_low_hmes
if ratio_type == :hme_score
- adjust = Options.params.hmes_adjust
- if Options.params.cross_type == 'back'
+ adjust = Options.hmes_adjust
+ if Options.cross_type == 'back'
cutoff = (1.0/adjust) + 1.0
else # outcross
cutoff = (2.0/adjust) + 1.0
end
else # ratio_type is bfr_score
@@ -146,10 +190,13 @@
cutoff = 0.0
end
cutoff
end
+ # Cut off value calculation for bfr contigs.
+ # ratio value at index 0.1% length of an array or at index zero of an array that contains decreasing order of bfr ratios
+ # @param selected_contigs [Hash] a hash of contigs with selected bfr score, a subset of assembly hash
def bfr_cutoff(selected_contigs, prop=0.1)
ratios = []
selected_contigs.each_key do | frag |
ratios << selected_contigs[frag].bfr_score
end
@@ -160,11 +207,11 @@
index = 1
end
ratios[index - 1]
end
- # method is to discard homozygous variant positions for which background bulk
- # pileup shows proportion higher than 0.35 for variant allele/non-reference allele
+ # Method is to discard homozygous variant positions for which background bulk
+ # pileup shows a fraction value higher than 0.35 for variant allele/non-reference allele
# a recessive variant is expected to have 1/3rd frequency in background bulk
def verify_bg_bulk_pileup
unless defined?(@hmes_frags)
self.hmes_frags
end