bin/polymarker.rb in bio-polyploid-tools-0.3.1 vs bin/polymarker.rb in bio-polyploid-tools-0.3.3
- old
+ new
@@ -8,19 +8,35 @@
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
$: << File.expand_path('.')
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
require path
+arm_selection_functions = Hash.new;
+arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
+ ret = contig_name[0,2]
+ return ret
+end
+#Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
+arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
+ ret = contig_name.split('_')[2][0,2]
+ return ret
+end
+arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
+ ret = contig_name.split(':')[0].split("_")[1];
+ return ret
+end
+
options = {}
options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
options[:chunks] = 1
options[:bucket_size] = 0
options[:bucket] = 1
options[:model] = "est2genome"
+options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
OptionParser.new do |opts|
opts.banner = "Usage: polymarker.rb [options]"
opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
options[:path_to_contigs] = o
@@ -43,10 +59,14 @@
end
opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
options[:model] = o
end
+
+ opts.on("-a", "--arm_selection arm_selection_embl|arm_selection_morex|arm_selection_first_two", "Function to decide the chromome arm") do |o|
+ options[:arm_selection] = arm_selection_functions[o.to_sym];
+ end
end.parse!
p options
@@ -113,12 +133,18 @@
snp = nil
if options[:marker_list] #List with Sequence
snp = Bio::PolyploidTools::SNPSequence.parse(line)
elsif options[:snp_list] and options[:reference] #List and fasta file
snp = Bio::PolyploidTools::SNP.parse(line)
- region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
- snp.template_sequence = fasta_reference_db.fetch_sequence(region)
+ entry = fasta_reference_db.index.region_for_entry(snp.gene)
+ if entry
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
+ else
+ $stderr.puts "Unable to find entry for #{snp.gene}"
+ end
+
else
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
end
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
snp.snp_in = snp_in
@@ -176,20 +202,12 @@
#4. Load all the results from exonerate and get the input filename for primer3
#Custom arm selection function that only uses the first two characters. Maybe
#we want to make it a bit more cleaver
write_status "Reading best alignment on each chromosome"
-arm_selection_first_two = lambda do | contig_name |
- ret = contig_name[0,2]
- return ret
-end
-#Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
-arm_selection_embl = lambda do | contig_name|
- ret = contig_name.split('_')[2][0,2]
- return ret
-end
+
container= Bio::PolyploidTools::ExonContainer.new
container.flanking_size=100
container.gene_models(temp_fasta_query)
container.chromosomes(temp_contigs)
container.add_parental({:name=>snp_in})
@@ -197,10 +215,10 @@
snps.each do |snp|
snp.container = container
snp.flanking_size = container.flanking_size
container.add_snp(snp)
end
-container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_embl, :min_identity=>min_identity})
+container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity})
#4.1 generating primer3 file
write_status "Running primer3"
file = File.open(exons_filename, "w")
\ No newline at end of file