lib/cheripic/cmd.rb in cheripic-1.2.0 vs lib/cheripic/cmd.rb in cheripic-1.2.5
- old
+ new
@@ -50,14 +50,18 @@
:type => String,
:default => 'pileup'
opt :mut_bulk, 'Pileup or sorted BAM file alignments from mutant/trait of interest bulk 1',
:short => '-a',
:type => String
+ opt :mut_bulk_vcf, 'vcf file for variants from mutant/trait of interest bulk 1',
+ :type => String
opt :bg_bulk, 'Pileup or sorted BAM file alignments from background/wildtype bulk 2',
:short => '-b',
:type => String
- opt :output, 'Directory to store results, will be created if not existing',
+ opt :bg_bulk_vcf, 'vcf file for variants from background/wildtype bulk 2',
+ :type => String
+ opt :output, 'custom name tag to include in the output file name',
:default => 'cheripic_results'
opt :loglevel, 'Choose any one of "info / warn / debug" level for logs generated',
:default => 'debug'
opt :hmes_adjust, 'factor added to snp count of each contig to adjust for hme score calculations',
:type => Float,
@@ -66,13 +70,21 @@
:type => Float,
:default => 0.2
opt :hthigh, 'high level for categorizing heterozygosity',
:type => Float,
:default => 0.9
- opt :mindepth, 'minimum read depth to conisder a position for variant calls',
+ opt :mindepth, 'minimum read depth at a position to consider for variant calls',
:type => Integer,
:default => 6
+ opt :max_d_multiple, "multiplication factor for average coverage to calculate maximum read coverage
+if set zero no calculation will be made from bam file.\nsetting this value will override user set max depth",
+ :type => Integer,
+ :default => 5
+ opt :maxdepth, "maximum read depth at a position to consider for variant calls
+if set to zero no user max depth will be used",
+ :type => Integer,
+ :default => 0
opt :min_non_ref_count, 'minimum read depth supporting non reference base at each position',
:type => Integer,
:default => 3
opt :min_indel_count_support, 'minimum read depth supporting an indel at each position',
:type => Integer,
@@ -95,11 +107,12 @@
:type => String,
:default => 'back'
opt :use_all_contigs, 'option to select all contigs or only contigs containing variants for analysis',
:type => FalseClass,
:default => false
- opt :include_low_hmes, 'option to include or discard variants from contigs with low hme-score or bfr score to list in the final output',
+ opt :include_low_hmes, 'option to include or discard variants from contigs with
+low hme-score or bfr score to list in the final output',
:type => FalseClass,
:default => false
opt :polyploidy, 'Set if the data input is from polyploids',
:type => FalseClass,
:default => false
@@ -109,10 +122,14 @@
:default => ''
opt :bg_parent, 'Pileup or sorted BAM file alignments from background/wildtype parent',
:short => '-r',
:type => String,
:default => ''
+ opt :repeats_file, 'repeat masker output file for the assembly ',
+ :short => '-R',
+ :type => String,
+ :default => ''
opt :bfr_adjust, 'factor added to hemi snp frequency of each parent to adjust for bfr calculations',
:type => Float,
:default => 0.05
opt :sel_seq_len, 'sequence length to print from either side of selected variants',
:type => Integer,
@@ -131,12 +148,13 @@
Description: Candidate mutation and closely linked marker selection for non reference genomes
Uses bulk segregant data from non-reference sequence genomes
Inputs:
1. Needs a reference fasta file of asssembly use for variant analysis
- 2. Pileup files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
- 3. If polyploid species, include of pileup from one or both parents
+ 2. Pileup/Bam files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
+ 3. If providing bam files, you have to include vcf files for the respective bulks
+ 4. If polyploid species, include pileup/bam files from one or both parents
USAGE:
cheripic <options>
OPTIONS:
@@ -147,65 +165,91 @@
# examples to display from command line
def print_examples
msg = <<-EOS
- Cheripic v#{Cheripic::VERSION.dup}
+ Cheripic v#{Cheripic::VERSION.dup}
+ Authors: Shyam Rallapalli and Dan MacLean
- EXAMPLE COMMANDS:
- 1. cheripic -f assembly.fa -a mutbulk.pileup -b bgbulk.pileup --output=cheripic_output
- 2. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
- --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true --output cheripic_results
- 3. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
- --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true
- --no-only-frag-with-vars --no-filter-out-low-hmes --output cheripic_results
+ EXAMPLE COMMANDS:
+ 1. cheripic -f assembly.fa -a mutbulk.pileup -b bgbulk.pileup --output=cheripic_output
+ 2. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
+ --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true --output cheripic_results
+ 3. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
+ --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true
+ --no-only-frag-with-vars --no-filter-out-low-hmes --output cheripic_results
+ 4. cheripic -h or cheripic --help
+ 5. cheripic -v or cheripic --version
+
EOS
puts msg.split("\n").map{ |line| line.lstrip }.join("\n")
exit(0)
end
# calls other methods to check if command line inputs are valid
def check_arguments
- check_output_dir
+ check_output
check_log_level
- check_input_files
+ check_input_types
end
- # TODO: check bulk input types and process associated files
- # def check_input_types
- # if @options[:input_format] == 'vcf'
- #
- # end
- # end
-
- # checks if input files are valid
- def check_input_files
+ # checks input files based on bulk file type
+ def check_input_types
+ inputfiles = {}
+ inputfiles[:required] = %i{assembly mut_bulk}
+ inputfiles[:optional] = %i{bg_bulk}
+ if @options[:input_format] == 'bam'
+ inputfiles[:required] << %i{mut_bulk_vcf}
+ inputfiles[:optional] << %i{bg_bulk_vcf}
+ end
if @options[:polyploidy]
- inputfiles = %i{assembly mut_bulk bg_bulk mut_parent bg_parent}
- else
- inputfiles = %i{assembly mut_bulk bg_bulk}
+ inputfiles[:either] = %i{mut_parent bg_parent}
end
- inputfiles.each do | symbol |
- if @options[symbol]
- file = @options[symbol]
- @options[symbol] = File.expand_path(file)
- unless File.exist?(file)
- raise CheripicIOError.new "#{symbol} file, #{file} does not exist: "
+ check_input_files(inputfiles)
+ end
+
+ # checks if input files are valid
+ def check_input_files(inputfiles)
+ check = 0
+ inputfiles.each_key do | type |
+ inputfiles[type].flatten!
+ inputfiles[type].each do | symbol |
+ if @options[symbol]
+ file = @options[symbol]
+ @options[symbol] = File.expand_path(file)
+ next if type == :optional
+ if type == :required and not File.exist?(file)
+ raise CheripicIOError.new "#{symbol} file, #{file} does not exist: "
+ elsif type == :either and File.exist?(file)
+ check = 1
+ end
+ elsif type == :required
+ raise CheripicArgError.new "Options #{inputfiles}, all must be specified. " +
+ 'Try --help for further help.'
end
- else
- raise CheripicArgError.new "Options #{inputfiles}, all must be specified. " +
- 'Try --help for help.'
end
+ if type == :either and check == 0
+ raise CheripicArgError.new "One of the options #{inputfiles}, must be specified. " +
+ 'Try --help for further help.'
+ end
end
end
- # checks if output directory already exists
- def check_output_dir
- if Dir.exist?(@options[:output])
- raise CheripicArgError.new "#{@options[:output]} directory exists" +
- 'please choose a different output directory name'
+ # checks if files with output tag name already exists
+ def check_output
+ if (@options[:output].split('') & %w{# / : * ? ' < > | & $ ,}).any?
+ raise CheripicArgError.new 'please choose a name tag that contains ' +
+ 'alphanumeric characters, hyphen(-) and underscore(_) only'
end
+ @options[:hmes_frags] = "#{@options[:output]}_selected_hme_variants.txt"
+ @options[:bfr_frags] = "#{@options[:output]}_selected_bfr_variants.txt"
+ [@options[:hmes_frags], @options[:bfr_frags]].each do | file |
+ if File.exist?(file)
+ raise CheripicArgError.new "'#{file}' file exists " +
+ 'please choose a different name tag to be included in the output file name'
+ end
+ end
end
# checks and sets logger level
def check_log_level
unless %w(error info warn debug).include?(@options[:loglevel])
@@ -218,10 +262,11 @@
# Initializes an Implementer object using input options
# and calls run method of the Implementer to start the pipeline
# A hash of trollop option names as keys and user or default
# setting as values is passed to Implementer object
def run
- @options[:output] = File.expand_path @options[:output]
+ @options[:hmes_frags] = File.expand_path @options[:hmes_frags]
+ @options[:bfr_frags] = File.expand_path @options[:bfr_frags]
analysis = Implementer.new(@options)
analysis.run
end
end # Cmd