bin/polymarker.rb in bio-polyploid-tools-0.5.2 vs bin/polymarker.rb in bio-polyploid-tools-0.6.0

- old
+ new

@@ -34,10 +34,12 @@ options[:bucket_size] = 0 options[:bucket] = 1 options[:model] = "est2genome" options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ; options[:flanking_size] = 150; +options[:variation_free_region] = 0 +options[:extract_found_contigs] = false options[:primer_3_preferences] = { :primer_product_size_range => "50-150" , :primer_max_size => 25 , :primer_lib_ambiguity_codes_consensus => 1, :primer_liberal_base => 1, @@ -58,10 +60,15 @@ end opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o| options[:snp_list] = o end + + opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line.\n\ + requires --reference to get the sequence using a position") do |o| + options[:mutant_list] = o + end opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o| options[:reference] = o end @@ -69,22 +76,34 @@ options[:output_folder] = o end opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o| options[:model] = o - end + end - opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two", "Function to decide the chromome arm") do |o| + opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two", "Function to decide the chromome arm") do |o| options[:arm_selection] = arm_selection_functions[o.to_sym]; end opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o| options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] ) end + opts.on("-v", "--variation_free_region INT", "If present, avoid generating the common primer if there are homoeologous SNPs within the specified distance") do |o| + options[:variation_free_region] = o.to_i + end + + opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o| + options[:extract_found_contigs] = true + end + + opts.on("-P", "--primers_to_order")do + #TODO: have a string with the tails, optional. + options[:primers_to_order] = true + end + - end.parse! if options[:primer_3_preferences][:primer_product_size_range] range = options[:primer_3_preferences][:primer_product_size_range] range_arr = range.split("-") @@ -106,12 +125,13 @@ original_name="A" snp_in="B" fasta_reference = nil #test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv" -test_file=options[:marker_list] +test_file=options[:marker_list] if options[:marker_list] test_file=options[:snp_list] if options[:snp_list] +test_file=options[:mutant_list] if options[:mutant_list] fasta_reference = options[:reference] output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}" output_folder= options[:output_folder] if options[:output_folder] Dir.mkdir(output_folder) #TODO Make this tmp files @@ -120,16 +140,16 @@ exonerate_file="#{output_folder}/exonerate_tmp.tab" primer_3_input="#{output_folder}/primer_3_input_temp" primer_3_output="#{output_folder}/primer_3_output_temp" exons_filename="#{output_folder}/exons_genes_and_contigs.fa" output_primers="#{output_folder}/primers.csv" +output_to_order="#{output_folder}/primers_to_order.csv" @status_file="#{output_folder}/status.txt" primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config') model=options[:model] - def write_status(status) f=File.open(@status_file, "a") f.puts "#{Time.now.to_s},#{status}" f.close end @@ -146,13 +166,10 @@ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference}) fasta_reference_db.load_fai_entries p "Fasta reference: #{fasta_reference}" end - - - #1. Read all the SNP files #chromosome = nil write_status "Reading SNPs" File.open(test_file) do | f | f.each_line do | line | @@ -165,17 +182,26 @@ entry = fasta_reference_db.index.region_for_entry(snp.gene) if entry region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region snp.template_sequence = fasta_reference_db.fetch_sequence(region) else - $stderr.puts "ERROR: Unable to find entry for #{snp.gene}" + write_status "WARN: Unable to find entry for #{snp.gene}" end - + elsif options[:mutant_list] and options[:reference] #List and fasta file + snp = Bio::PolyploidTools::SNPMutant.parse(line) + entry = fasta_reference_db.index.region_for_entry(snp.contig) + if entry + region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region + snp.full_sequence = fasta_reference_db.fetch_sequence(region) + else + write_status "WARN: Unable to find entry for #{snp.gene}" + end else rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. " end rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil + snp.snp_in = snp_in snp.original_name = original_name if snp.position snps << snp else @@ -204,11 +230,11 @@ #3. Run exonerate on each of the possible chromosomes for the SNP #puts chromosome #chr_group = chromosome[0] write_status "Searching markers in genome" exo_f = File.open(exonerate_file, "w") -contigs_f = File.open(temp_contigs, "w") +contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs] filename=path_to_contigs puts filename target=filename fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target}) @@ -222,33 +248,34 @@ found_contigs.add(aln.target_id) entry = fasta_file.index.region_for_entry(aln.target_id) raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil region = entry.get_full_region seq = fasta_file.fetch_sequence(region) - contigs_f.puts(">#{aln.target_id}\n#{seq}") + contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs] end end end -exo_f.close() -contigs_f.close() +exo_f.close() +contigs_f.close() if options[:extract_found_contigs] #4. Load all the results from exonerate and get the input filename for primer3 #Custom arm selection function that only uses the first two characters. Maybe #we want to make it a bit more cleaver write_status "Reading best alignment on each chromosome" container= Bio::PolyploidTools::ExonContainer.new container.flanking_size=options[:flanking_size] container.gene_models(temp_fasta_query) -container.chromosomes(temp_contigs) +container.chromosomes(fasta_reference) container.add_parental({:name=>snp_in}) container.add_parental({:name=>original_name}) snps.each do |snp| snp.container = container snp.flanking_size = container.flanking_size + snp.variation_free_region = options[:variation_free_region] container.add_snp(snp) end container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity}) @@ -278,9 +305,11 @@ end kasp_container.add_primers_file(primer_3_output) if added_exons > 0 header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors" File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") } + +File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) } write_status "DONE" rescue StandardError => e write_status "ERROR\t#{e.message}" raise e \ No newline at end of file