#!/usr/bin/env ruby

#This This script converts the a file with snps and positions with the header:
#GENE,BASE,POS,SNP,Chromosome
#  snp.gene, snp.original, snp.position, snp.snp, snp.chromosome 
#To the input expected by polymarker
#ID, Chromosome, sequence
#With sequence containing the SNP in the notation "[A/T]"
require 'bio'
require 'optparse'

$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
$: << File.expand_path('.')
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
require path


def log(msg)
  time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
  puts "#{time}: #{msg}"
end

markers = nil

options = {}
options[:flanking_size] = 100
test_file=''
OptionParser.new do |opts|
  
  opts.banner = "Usage: snp_postion_to_polymarker.rb [options]"

  opts.on("-s", "--snp_file CSV", "CSV file with the following columnns:\nID,Allele_1,position,Allele_1,target_chromosome") do |o|
    options[:snp_file] = o
    test_file = o 
  end
  opts.on("-r", "--reference FASTA", "reference with the genes/contings/marker seuqnece") do |o|
    options[:reference] = o
  end
  opts.on("-o", "--out CSV", "Output file ") do |o|
    options[:output] = o
  end
  opts.on("-f", "--flanking_size INT", "Flanking size around the SNP") do |o|
    options[:flanking_size] = o.to_i
  end

  opts.on("-t", "--mutant_list FILE", "File with the list of positions with mutation and the mutation line. Example: IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T\n\
    requires --reference to get the sequence using a position") do |o|
    options[:mutant_list] = o
     test_file = o 
  end
  
end.parse!
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"

fasta_reference = options[:reference] if options[:reference]
fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
fasta_reference_db.load_fai_entries

out = $stdout
lastRegion = nil
lastTemplate = nil
out = File.open(options[:output], "w") if options[:output]
File.open(test_file) do | f |
  f.each_line do | line |
    	snp = nil
      entry = nil
      if options[:snp_file]
    	   snp = Bio::PolyploidTools::SNP.parse(line)
         entry = fasta_reference_db.index.region_for_entry(snp.gene)
      elsif options[:mutant_list]
         snp = Bio::PolyploidTools::SNPMutant.parse(line)
         entry = fasta_reference_db.index.region_for_entry(snp.contig)
      end
    	#puts line
    	if entry
       		region = entry.get_full_region
          snp_name = snp.snp_id_in_seq

       		#if region != lastRegion
          #  lastTemplate = fasta_reference_db.fetch_sequence(region)
          #end
          start, total, new_position = snp.to_polymarker_coordinates(options[:flanking_size])
          region.start = start 
          region.end = start + total
          #puts region
          local_template = fasta_reference_db.fetch_sequence(region)

          snp.position = new_position

          snp.template_sequence = local_template
          lastRegion = region

       		out.puts "#{snp.gene}_#{snp_name},#{snp.chromosome},#{snp.to_polymarker_sequence(options[:flanking_size])}"
    	else
    	   $stderr.puts "ERROR: Unable to find entry for #{snp.gene}"
    	end
	end
end

out.close if options[:output]