lib/cheripic/cmd.rb in cheripic-1.2.0 vs lib/cheripic/cmd.rb in cheripic-1.2.5

- old
+ new

@@ -50,14 +50,18 @@ :type => String, :default => 'pileup' opt :mut_bulk, 'Pileup or sorted BAM file alignments from mutant/trait of interest bulk 1', :short => '-a', :type => String + opt :mut_bulk_vcf, 'vcf file for variants from mutant/trait of interest bulk 1', + :type => String opt :bg_bulk, 'Pileup or sorted BAM file alignments from background/wildtype bulk 2', :short => '-b', :type => String - opt :output, 'Directory to store results, will be created if not existing', + opt :bg_bulk_vcf, 'vcf file for variants from background/wildtype bulk 2', + :type => String + opt :output, 'custom name tag to include in the output file name', :default => 'cheripic_results' opt :loglevel, 'Choose any one of "info / warn / debug" level for logs generated', :default => 'debug' opt :hmes_adjust, 'factor added to snp count of each contig to adjust for hme score calculations', :type => Float, @@ -66,13 +70,21 @@ :type => Float, :default => 0.2 opt :hthigh, 'high level for categorizing heterozygosity', :type => Float, :default => 0.9 - opt :mindepth, 'minimum read depth to conisder a position for variant calls', + opt :mindepth, 'minimum read depth at a position to consider for variant calls', :type => Integer, :default => 6 + opt :max_d_multiple, "multiplication factor for average coverage to calculate maximum read coverage +if set zero no calculation will be made from bam file.\nsetting this value will override user set max depth", + :type => Integer, + :default => 5 + opt :maxdepth, "maximum read depth at a position to consider for variant calls +if set to zero no user max depth will be used", + :type => Integer, + :default => 0 opt :min_non_ref_count, 'minimum read depth supporting non reference base at each position', :type => Integer, :default => 3 opt :min_indel_count_support, 'minimum read depth supporting an indel at each position', :type => Integer, @@ -95,11 +107,12 @@ :type => String, :default => 'back' opt :use_all_contigs, 'option to select all contigs or only contigs containing variants for analysis', :type => FalseClass, :default => false - opt :include_low_hmes, 'option to include or discard variants from contigs with low hme-score or bfr score to list in the final output', + opt :include_low_hmes, 'option to include or discard variants from contigs with +low hme-score or bfr score to list in the final output', :type => FalseClass, :default => false opt :polyploidy, 'Set if the data input is from polyploids', :type => FalseClass, :default => false @@ -109,10 +122,14 @@ :default => '' opt :bg_parent, 'Pileup or sorted BAM file alignments from background/wildtype parent', :short => '-r', :type => String, :default => '' + opt :repeats_file, 'repeat masker output file for the assembly ', + :short => '-R', + :type => String, + :default => '' opt :bfr_adjust, 'factor added to hemi snp frequency of each parent to adjust for bfr calculations', :type => Float, :default => 0.05 opt :sel_seq_len, 'sequence length to print from either side of selected variants', :type => Integer, @@ -131,12 +148,13 @@ Description: Candidate mutation and closely linked marker selection for non reference genomes Uses bulk segregant data from non-reference sequence genomes Inputs: 1. Needs a reference fasta file of asssembly use for variant analysis - 2. Pileup files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks - 3. If polyploid species, include of pileup from one or both parents + 2. Pileup/Bam files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks + 3. If providing bam files, you have to include vcf files for the respective bulks + 4. If polyploid species, include pileup/bam files from one or both parents USAGE: cheripic <options> OPTIONS: @@ -147,65 +165,91 @@ # examples to display from command line def print_examples msg = <<-EOS - Cheripic v#{Cheripic::VERSION.dup} + Cheripic v#{Cheripic::VERSION.dup} + Authors: Shyam Rallapalli and Dan MacLean - EXAMPLE COMMANDS: - 1. cheripic -f assembly.fa -a mutbulk.pileup -b bgbulk.pileup --output=cheripic_output - 2. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup - --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true --output cheripic_results - 3. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup - --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true - --no-only-frag-with-vars --no-filter-out-low-hmes --output cheripic_results + EXAMPLE COMMANDS: + 1. cheripic -f assembly.fa -a mutbulk.pileup -b bgbulk.pileup --output=cheripic_output + 2. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup + --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true --output cheripic_results + 3. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup + --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true + --no-only-frag-with-vars --no-filter-out-low-hmes --output cheripic_results + 4. cheripic -h or cheripic --help + 5. cheripic -v or cheripic --version + EOS puts msg.split("\n").map{ |line| line.lstrip }.join("\n") exit(0) end # calls other methods to check if command line inputs are valid def check_arguments - check_output_dir + check_output check_log_level - check_input_files + check_input_types end - # TODO: check bulk input types and process associated files - # def check_input_types - # if @options[:input_format] == 'vcf' - # - # end - # end - - # checks if input files are valid - def check_input_files + # checks input files based on bulk file type + def check_input_types + inputfiles = {} + inputfiles[:required] = %i{assembly mut_bulk} + inputfiles[:optional] = %i{bg_bulk} + if @options[:input_format] == 'bam' + inputfiles[:required] << %i{mut_bulk_vcf} + inputfiles[:optional] << %i{bg_bulk_vcf} + end if @options[:polyploidy] - inputfiles = %i{assembly mut_bulk bg_bulk mut_parent bg_parent} - else - inputfiles = %i{assembly mut_bulk bg_bulk} + inputfiles[:either] = %i{mut_parent bg_parent} end - inputfiles.each do | symbol | - if @options[symbol] - file = @options[symbol] - @options[symbol] = File.expand_path(file) - unless File.exist?(file) - raise CheripicIOError.new "#{symbol} file, #{file} does not exist: " + check_input_files(inputfiles) + end + + # checks if input files are valid + def check_input_files(inputfiles) + check = 0 + inputfiles.each_key do | type | + inputfiles[type].flatten! + inputfiles[type].each do | symbol | + if @options[symbol] + file = @options[symbol] + @options[symbol] = File.expand_path(file) + next if type == :optional + if type == :required and not File.exist?(file) + raise CheripicIOError.new "#{symbol} file, #{file} does not exist: " + elsif type == :either and File.exist?(file) + check = 1 + end + elsif type == :required + raise CheripicArgError.new "Options #{inputfiles}, all must be specified. " + + 'Try --help for further help.' end - else - raise CheripicArgError.new "Options #{inputfiles}, all must be specified. " + - 'Try --help for help.' end + if type == :either and check == 0 + raise CheripicArgError.new "One of the options #{inputfiles}, must be specified. " + + 'Try --help for further help.' + end end end - # checks if output directory already exists - def check_output_dir - if Dir.exist?(@options[:output]) - raise CheripicArgError.new "#{@options[:output]} directory exists" + - 'please choose a different output directory name' + # checks if files with output tag name already exists + def check_output + if (@options[:output].split('') & %w{# / : * ? ' < > | & $ ,}).any? + raise CheripicArgError.new 'please choose a name tag that contains ' + + 'alphanumeric characters, hyphen(-) and underscore(_) only' end + @options[:hmes_frags] = "#{@options[:output]}_selected_hme_variants.txt" + @options[:bfr_frags] = "#{@options[:output]}_selected_bfr_variants.txt" + [@options[:hmes_frags], @options[:bfr_frags]].each do | file | + if File.exist?(file) + raise CheripicArgError.new "'#{file}' file exists " + + 'please choose a different name tag to be included in the output file name' + end + end end # checks and sets logger level def check_log_level unless %w(error info warn debug).include?(@options[:loglevel]) @@ -218,10 +262,11 @@ # Initializes an Implementer object using input options # and calls run method of the Implementer to start the pipeline # A hash of trollop option names as keys and user or default # setting as values is passed to Implementer object def run - @options[:output] = File.expand_path @options[:output] + @options[:hmes_frags] = File.expand_path @options[:hmes_frags] + @options[:bfr_frags] = File.expand_path @options[:bfr_frags] analysis = Implementer.new(@options) analysis.run end end # Cmd