bin/polymarker.rb in bio-polyploid-tools-0.8.4 vs bin/polymarker.rb in bio-polyploid-tools-0.8.5
- old
+ new
@@ -37,10 +37,11 @@
options[:genomes_count] = 3
options[:min_identity] = 90
options[:scoring] = :genome_specific
options[:database] = false
options[:filter_best] = false
+options[:max_hits] = 10
options[:aligner] = :exonerate
options[:primer_3_preferences] = {
:primer_product_size_range => "50-150" ,
@@ -55,97 +56,100 @@
OptionParser.new do |opts|
opts.banner = "Usage: polymarker.rb [options]"
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
+ tmp_str = o
+ arr = o.split(",")
+ if arr.size == 2
+ options[:arm_selection] = lambda do |contig_name|
+ separator, field = arr
+ field = field.to_i
+ ret = contig_name.split(separator)[field]
+ return ret
+ end
+ else
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
+ end
+ end
+
+ opts.on("-b", "--filter_best", "If set, only keep the best alignment for each chromosome") do
+ options[:filter_best] = true
+ end
+
opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
options[:path_to_contigs] = o
end
- opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
- options[:marker_list] = o
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
+ options[:database] = o
end
+ opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
+ options[:model] = o
+ end
+
opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
options[:genomes_count] = o.to_i
end
-
- opts.on("-b", "--filter_best", "If set, only keep the best alignment for each chromosome") do
- options[:filter_best] = true
- end
-
- opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
- options[:snp_list] = o
+ opts.on("-i", "--min_identity INT", "Minimum identity to consider a hit (default 90)") do |o|
+ options[:min_identity] = o.to_i
end
- opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\
- requires --reference to get the sequence using a position") do |o|
- options[:mutant_list] = o
+ opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
+ options[:marker_list] = o
end
-
- opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
- options[:reference] = o
- end
- opts.on("-i", "--min_identity INT", "Minimum identity to consider a hit (default 90)") do |o|
- options[:min_identity] = o.to_i
- end
-
opts.on("-o", "--output FOLDER", "Output folder") do |o|
options[:output_folder] = o
end
-
- opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
- options[:model] = o
- end
- opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
- tmp_str = o
- arr = o.split(",")
- if arr.size == 2
- options[:arm_selection] = lambda do |contig_name|
- separator, field = arr
- field = field.to_i
- ret = contig_name.split(separator)[field]
- return ret
- end
- else
- options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
- end
-
- end
-
opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] )
end
+ opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
+ options[:reference] = o
+ end
+
+ opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
+ options[:snp_list] = o
+ end
+
+ opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\
+ requires --reference to get the sequence using a position") do |o|
+ options[:mutant_list] = o
+ end
+
opts.on("-v", "--variation_free_region INT", "If present, avoid generating the common primer if there are homoeologous SNPs within the specified distance") do |o|
options[:variation_free_region] = o.to_i
end
opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
options[:extract_found_contigs] = true
end
- opts.on("-P", "--primers_to_order", "If present, save a separate file with the primers with the KASP tails")do
- #TODO: have a string with the tails, optional.
- options[:primers_to_order] = true
+ opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: exonerate") do |o|
+ raise "Invalid aligner" unless o == "exonerate" or o == "blast"
+ options[:aligner] = o.to_sym
end
opts.on("-H", "--het_dels", "If present, change the scoring to give priority to: semi-specific, specific, non-specific") do
options[:scoring] = :het_dels
end
- opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: exonerate") do |o|
- raise "Invalid aligner" unless o == "exonerate" or o == "blast"
- options[:aligner] = o.to_sym
+ opts.on("-M", "--max_hits INT", "Maximum number of hits to consider a region as non repetitive. Markers with more than this number of hits will be ignored. (default #{options[:max_hits]})") do |o|
+ options[:max_hits] = o.to_i
end
- opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
- options[:database] = o
+ opts.on("-P", "--primers_to_order", "If present, save a separate file with the primers with the KASP tails")do
+ #TODO: have a string with the tails, optional.
+ options[:primers_to_order] = true
end
+
end.parse!
validate_files(options)
@@ -306,11 +310,11 @@
end
end
end
-Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
+Bio::DB::Blast.align({:max_hits=>options[:max_hits], :query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
end if options[:aligner] == :blast
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
@@ -327,12 +331,13 @@
#Custom arm selection function that only uses the first two characters. Maybe
#we want to make it a bit more cleaver
write_status "Reading best alignment on each chromosome"
-container= Bio::PolyploidTools::ExonContainer.new
-container.flanking_size=options[:flanking_size]
+container = Bio::PolyploidTools::ExonContainer.new
+container.flanking_size = options[:flanking_size]
+container.max_hits = options[:max_hits]
container.gene_models(temp_fasta_query)
container.chromosomes(target)
container.add_parental({:name=>snp_in})
container.add_parental({:name=>original_name})
@@ -385,10 +390,10 @@
end
kasp_container.add_primers_file(primer_3_output) if added_exons > 0
-header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
+header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,is_repetitive,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
write_status "DONE"