bin/polymarker.rb in bio-polyploid-tools-0.5.2 vs bin/polymarker.rb in bio-polyploid-tools-0.6.0
- old
+ new
@@ -34,10 +34,12 @@
options[:bucket_size] = 0
options[:bucket] = 1
options[:model] = "est2genome"
options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
options[:flanking_size] = 150;
+options[:variation_free_region] = 0
+options[:extract_found_contigs] = false
options[:primer_3_preferences] = {
:primer_product_size_range => "50-150" ,
:primer_max_size => 25 ,
:primer_lib_ambiguity_codes_consensus => 1,
:primer_liberal_base => 1,
@@ -58,10 +60,15 @@
end
opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
options[:snp_list] = o
end
+
+ opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\
+ requires --reference to get the sequence using a position") do |o|
+ options[:mutant_list] = o
+ end
opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
options[:reference] = o
end
@@ -69,22 +76,34 @@
options[:output_folder] = o
end
opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
options[:model] = o
- end
+ end
- opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two", "Function to decide the chromome arm") do |o|
+ opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two", "Function to decide the chromome arm") do |o|
options[:arm_selection] = arm_selection_functions[o.to_sym];
end
opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] )
end
+ opts.on("-v", "--variation_free_region INT", "If present, avoid generating the common primer if there are homoeologous SNPs within the specified distance") do |o|
+ options[:variation_free_region] = o.to_i
+ end
+
+ opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
+ options[:extract_found_contigs] = true
+ end
+
+ opts.on("-P", "--primers_to_order")do
+ #TODO: have a string with the tails, optional.
+ options[:primers_to_order] = true
+ end
+
-
end.parse!
if options[:primer_3_preferences][:primer_product_size_range]
range = options[:primer_3_preferences][:primer_product_size_range]
range_arr = range.split("-")
@@ -106,12 +125,13 @@
original_name="A"
snp_in="B"
fasta_reference = nil
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
-test_file=options[:marker_list]
+test_file=options[:marker_list] if options[:marker_list]
test_file=options[:snp_list] if options[:snp_list]
+test_file=options[:mutant_list] if options[:mutant_list]
fasta_reference = options[:reference]
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
output_folder= options[:output_folder] if options[:output_folder]
Dir.mkdir(output_folder)
#TODO Make this tmp files
@@ -120,16 +140,16 @@
exonerate_file="#{output_folder}/exonerate_tmp.tab"
primer_3_input="#{output_folder}/primer_3_input_temp"
primer_3_output="#{output_folder}/primer_3_output_temp"
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
output_primers="#{output_folder}/primers.csv"
+output_to_order="#{output_folder}/primers_to_order.csv"
@status_file="#{output_folder}/status.txt"
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
model=options[:model]
-
def write_status(status)
f=File.open(@status_file, "a")
f.puts "#{Time.now.to_s},#{status}"
f.close
end
@@ -146,13 +166,10 @@
fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
fasta_reference_db.load_fai_entries
p "Fasta reference: #{fasta_reference}"
end
-
-
-
#1. Read all the SNP files
#chromosome = nil
write_status "Reading SNPs"
File.open(test_file) do | f |
f.each_line do | line |
@@ -165,17 +182,26 @@
entry = fasta_reference_db.index.region_for_entry(snp.gene)
if entry
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
else
- $stderr.puts "ERROR: Unable to find entry for #{snp.gene}"
+ write_status "WARN: Unable to find entry for #{snp.gene}"
end
-
+ elsif options[:mutant_list] and options[:reference] #List and fasta file
+ snp = Bio::PolyploidTools::SNPMutant.parse(line)
+ entry = fasta_reference_db.index.region_for_entry(snp.contig)
+ if entry
+ region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
+ snp.full_sequence = fasta_reference_db.fetch_sequence(region)
+ else
+ write_status "WARN: Unable to find entry for #{snp.gene}"
+ end
else
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
end
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
+
snp.snp_in = snp_in
snp.original_name = original_name
if snp.position
snps << snp
else
@@ -204,11 +230,11 @@
#3. Run exonerate on each of the possible chromosomes for the SNP
#puts chromosome
#chr_group = chromosome[0]
write_status "Searching markers in genome"
exo_f = File.open(exonerate_file, "w")
-contigs_f = File.open(temp_contigs, "w")
+contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
filename=path_to_contigs
puts filename
target=filename
fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
@@ -222,33 +248,34 @@
found_contigs.add(aln.target_id)
entry = fasta_file.index.region_for_entry(aln.target_id)
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
region = entry.get_full_region
seq = fasta_file.fetch_sequence(region)
- contigs_f.puts(">#{aln.target_id}\n#{seq}")
+ contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
end
end
end
-exo_f.close()
-contigs_f.close()
+exo_f.close()
+contigs_f.close() if options[:extract_found_contigs]
#4. Load all the results from exonerate and get the input filename for primer3
#Custom arm selection function that only uses the first two characters. Maybe
#we want to make it a bit more cleaver
write_status "Reading best alignment on each chromosome"
container= Bio::PolyploidTools::ExonContainer.new
container.flanking_size=options[:flanking_size]
container.gene_models(temp_fasta_query)
-container.chromosomes(temp_contigs)
+container.chromosomes(fasta_reference)
container.add_parental({:name=>snp_in})
container.add_parental({:name=>original_name})
snps.each do |snp|
snp.container = container
snp.flanking_size = container.flanking_size
+ snp.variation_free_region = options[:variation_free_region]
container.add_snp(snp)
end
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity})
@@ -278,9 +305,11 @@
end
kasp_container.add_primers_file(primer_3_output) if added_exons > 0
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
+
+File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
write_status "DONE"
rescue StandardError => e
write_status "ERROR\t#{e.message}"
raise e
\ No newline at end of file