bin/polymarker.rb in bio-polyploid-tools-0.3.1 vs bin/polymarker.rb in bio-polyploid-tools-0.3.3

- old
+ new

@@ -8,19 +8,35 @@ $: << File.expand_path(File.dirname(__FILE__) + '/../lib') $: << File.expand_path('.') path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb') require path +arm_selection_functions = Hash.new; +arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name | + ret = contig_name[0,2] + return ret +end +#Function to parse stuff like: IWGSC_CSS_1AL_scaff_110 +arm_selection_functions[:arm_selection_embl] = lambda do | contig_name| + ret = contig_name.split('_')[2][0,2] + return ret +end +arm_selection_functions[:arm_selection_morex] = lambda do | contig_name | + ret = contig_name.split(':')[0].split("_")[1]; + return ret +end + options = {} options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa" options[:chunks] = 1 options[:bucket_size] = 0 options[:bucket] = 1 options[:model] = "est2genome" +options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ; OptionParser.new do |opts| opts.banner = "Usage: polymarker.rb [options]" opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o| options[:path_to_contigs] = o @@ -43,10 +59,14 @@ end opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o| options[:model] = o end + + opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two", "Function to decide the chromome arm") do |o| + options[:arm_selection] = arm_selection_functions[o.to_sym]; + end end.parse! p options @@ -113,12 +133,18 @@ snp = nil if options[:marker_list] #List with Sequence snp = Bio::PolyploidTools::SNPSequence.parse(line) elsif options[:snp_list] and options[:reference] #List and fasta file snp = Bio::PolyploidTools::SNP.parse(line) - region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region - snp.template_sequence = fasta_reference_db.fetch_sequence(region) + entry = fasta_reference_db.index.region_for_entry(snp.gene) + if entry + region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region + snp.template_sequence = fasta_reference_db.fetch_sequence(region) + else + $stderr.puts "Unable to find entry for #{snp.gene}" + end + else rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. " end rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil snp.snp_in = snp_in @@ -176,20 +202,12 @@ #4. Load all the results from exonerate and get the input filename for primer3 #Custom arm selection function that only uses the first two characters. Maybe #we want to make it a bit more cleaver write_status "Reading best alignment on each chromosome" -arm_selection_first_two = lambda do | contig_name | - ret = contig_name[0,2] - return ret -end -#Function to parse stuff like: IWGSC_CSS_1AL_scaff_110 -arm_selection_embl = lambda do | contig_name| - ret = contig_name.split('_')[2][0,2] - return ret -end + container= Bio::PolyploidTools::ExonContainer.new container.flanking_size=100 container.gene_models(temp_fasta_query) container.chromosomes(temp_contigs) container.add_parental({:name=>snp_in}) @@ -197,10 +215,10 @@ snps.each do |snp| snp.container = container snp.flanking_size = container.flanking_size container.add_snp(snp) end -container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_embl, :min_identity=>min_identity}) +container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity}) #4.1 generating primer3 file write_status "Running primer3" file = File.open(exons_filename, "w") \ No newline at end of file