#!/usr/bin/env ruby
require 'bio'
require 'rubygems'
require 'pathname'
require 'bio-samtools'
require 'optparse'
require 'set'
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
$: << File.expand_path('.')
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
require path


options = {}
options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
options[:chunks] = 1
options[:bucket_size] = 0
options[:bucket] = 1
options[:model] = "est2genome"
OptionParser.new do |opts|
  opts.banner = "Usage: polymarker.rb [options]"

  opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
    options[:path_to_contigs] = o
  end
  
  opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
    options[:marker_list] = o
  end
  
  opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
    options[:snp_list] = o
  end
  
  opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
    options[:reference] = o
  end
  
  opts.on("-o", "--output FOLDER", "Output folder") do |o|
    options[:output_folder] = o
  end
  
  opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
     options[:model] = o
   end
  
    
end.parse!

p options
p ARGV


#TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result. 
#TODO: Make all this parameters

path_to_contigs=options[:path_to_contigs]

snp_in="A"
original_name="B"
fasta_reference = nil
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
test_file=options[:marker_list]
test_file=options[:snp_list] if options[:snp_list]
fasta_reference = options[:reference]
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}" 
output_folder= options[:output_folder] if  options[:output_folder]
Dir.mkdir(output_folder)
#TODO Make this tmp files
temp_fasta_query="#{output_folder}/to_align.fa"
temp_contigs="#{output_folder}/contigs_tmp.fa"
exonerate_file="#{output_folder}/exonerate_tmp.tab"
primer_3_input="#{output_folder}/primer_3_input_temp"
primer_3_output="#{output_folder}/primer_3_output_temp"
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
output_primers="#{output_folder}/primers.csv"

primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
model=options[:model] 


min_identity= 90
snps = Array.new

#0. Load the fasta index 
fasta_reference_db = nil
if fasta_reference
  fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
  fasta_reference_db.load_fai_entries
  p "Fasta reference: #{fasta_reference}"
end


#1. Read all the SNP files 
#All the SNPs should be on the same chromosome as the first SNP. 
#chromosome = nil
File.open(test_file) do | f |
  f.each_line do | line |
    # p line.chomp!
    snp = nil
    if options[:marker_list] #List with Sequence
      snp = Bio::PolyploidTools::SNPSequence.parse(line)  
    elsif options[:snp_list] and options[:reference] #List and fasta file
      snp = Bio::PolyploidTools::SNP.parse(line)
      region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
      snp.template_sequence = fasta_reference_db.fetch_sequence(region)
    else
      rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. " 
    end
    rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
    snp.snp_in = snp_in
    snp.original_name = original_name
    snps << snp
#    chromosome = snp.chromosome unless chromosome
  #  raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
  end
end

#1.1 Close fasta file
#fasta_reference_db.close() if fasta_reference_db
#2. Generate all the fasta files

written_seqs = Set.new
file = File.open(temp_fasta_query, "w")
snps.each do |snp|
  unless written_seqs.include?(snp.gene)
    written_seqs << snp.gene 
    file.puts snp.to_fasta
  end
end
file.close

#3. Run exonerate on each of the possible chromosomes for the SNP
#puts chromosome
#chr_group = chromosome[0]
exo_f = File.open(exonerate_file, "w")
contigs_f = File.open(temp_contigs, "w")
filename=path_to_contigs 
puts filename
target=filename

fasta_file = Bio::DB::Fasta::FastaFile.new(target)
fasta_file.load_fai_entries

found_cointigs = Set.new
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
  if aln.identity > min_identity
    exo_f.puts aln.line
    unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file. 
      found_cointigs.add(aln.target_id)
      entry = fasta_file.index.region_for_entry(aln.target_id)
      raise ExonerateException.new,  "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
      region = entry.get_full_region
      seq = fasta_file.fetch_sequence(region)
      contigs_f.puts(">#{aln.target_id}\n#{seq}")
    end
  end  
end
 
exo_f.close()
contigs_f.close()

#4. Load all the results from exonerate and get the input filename for primer3
#Custom arm selection function that only uses the first two characters. Maybe
#we want to make it a bit more cleaver
arm_selection_first_two = lambda do | contig_name |
  ret = contig_name[0,2]       
  return ret
end
#Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
arm_selection_embl = lambda do | contig_name|
  ret = contig_name.split('_')[2][0,2]
  return ret
end

container= Bio::PolyploidTools::ExonContainer.new
container.flanking_size=100
container.gene_models(temp_fasta_query)
container.chromosomes(temp_contigs)
container.add_parental({:name=>snp_in})
container.add_parental({:name=>original_name})
snps.each do |snp|
  snp.container = container
  snp.flanking_size = container.flanking_size
  container.add_snp(snp)
end
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_embl, :min_identity=>min_identity})

file = File.open(exons_filename, "w")
container.print_fasta_snp_exones(file)
file.close

file = File.open(primer_3_input, "w")
file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
file.puts("PRIMER_MAX_SIZE=25")
file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
file.puts("PRIMER_LIBERAL_BASE=1")
file.puts("PRIMER_NUM_RETURN=5")
file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
container.print_primer_3_exons(file, nil, snp_in)
file.close

Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})

#5. Pick the best primer and make the primer3 output
kasp_container=Bio::DB::Primer3::KASPContainer.new
kasp_container.line_1=snp_in
kasp_container.line_2=original_name

snps.each do |snp|
  kasp_container.add_snp(snp) 
end

kasp_container.add_primers_file(primer_3_output)
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }