require 'snp-search'
require 'snp_db_connection'
require 'snp_db_models'
require 'snp_db_schema'
require 'output_information_methods'
require 'activerecord-import'
require 'slop'

# README file: https://github.com/phe-bioinformatics/snp-search/blob/master/README.rdoc'

opts = Slop.parse do

  banner "\nruby snp-search [-create] [-output] [-d <sqlite3>] [options]*"

  separator ''

  on :C, :create, 'Create database'
  on :O, :output, 'Output a process'
  
  separator ''
  
  separator '-create -r reference_file.gbk (or .embl) -v vcf_file.vcf -d db.sqlite3'
  separator 'e.g. snp-search -C -r ecoli.gbk -v ecoli.vcf -d ecoli.sqlite3'
  on :r, :reference_file=, 'Reference genome file, in gbk or embl file format, Required', true
  on :v, :vcf_file=, 'variant call format (vcf) file, Required', true
  on :d, :name_of_database=, 'Name of database, Required'
  on :A, :cuttoff_ad=, 'AD ratio cutoff (default 0.9)', :as => :int, :default => 0.9

  separator ''
 
  separator '-output -all_or_filtered_snps -d db.sqlite3 -T|-F [options]'
  separator 'e.g. snp-search -O -f -F -d ecoli.sqlite3 -R -I phage,insertion,transposon -o ecoli_concatenated_snps_filtered.fasta'
  on :f, :all_or_filtered_snps, 'ignore SNPs from specified features in the database (if you do not want to ignore any SNPs, just use this option with -F/T -o)'
  on :F, :fasta, 'output fasta file format (default)'
  on :T, :tabular, 'output tabular file format'
  on :c, :cuttoff_snp_qual=, 'SNP quality cutoff, (default = 90)', :as => :int, :default => 90
  on :g, :cuttoff_genotype=, 'Genotype quality cutoff (default = 30)', :as => :int,  :default => 30
  on :R, :remove_non_informative_snps, 'Only output informative SNPs.'
  on :e, :ignore_snps_in_range=, 'A list of position ranges to ignore e.g 10..500,2000..2500.'
  on :a, :ignore_strains=, 'A list of strains to ignore (seperate by comma e.g. S1,S4,S8 ).'
  on :I, :ignore_snps_on_annotation=, 'The name of the feature(s) to ignore.  Features should be seperated by comma (e.g. phage,inserstion,transposon)'
  on :o, :out=, 'Name of output file, Required'
  on :t, :tree, 'Generate SNP phylogeny (only used with -fasta option)'
  on :p, :fasttree_path=, 'Full path to the FastTree tool (e.g. /usr/local/bin/FastTree. only used with -tree option)'
  separator ''

  separator '-output -unique_snps -d db.sqlite3 -s strains.txt -o unique_snps.txt [options]'
  separator 'e.g. snp-search -O -u -d ecoli.sqlite3 -s strains_list_for_unique_snps.txt -o ecoli_unique_snps_strains.txt'
  on :u, :unique_snps, 'Query for unique snps in the database'  
  on :c, :cuttoff_snp_qual=, 'SNP quality cutoff, (default = 90)', :as => :int, :default => 90
  on :g, :cuttoff_genotype=, 'Genotype quality cutoff (default = 30)', :as => :int,  :default => 30
  on :s, :strain=, 'The strains/samples you like to query (only used with -unique_snps flag)'
  on :o, :out=, 'Name of output file, Required'

  separator ''

  separator '-output -info -d db.sqlite3 -o info.txt [options]'
  separator ''
  on :i, :info, 'Output various information about SNPs'
  on :c, :cuttoff_snp_qual=, 'SNP quality cutoff, (default = 90)', :as => :int, :default => 90
  on :g, :cuttoff_genotype=, 'Genotype quality cutoff (default = 30)', :as => :int,  :default => 30
  on :o, :out=, 'Name of output file, Required'
end

###########################################################

# CREATING A DATABASE
if opts[:create] 

  # raise "Please provide a database file name" if opts[:reference_file].empty?
  # puts opts[:cuttoff_snp_qual].to_i
  
    error_msg = ""

    error_msg += "-d: \t Name of your database\n" unless opts[:name_of_database]
    error_msg += "-r: \t Reference genome file, in gbk or embl file format\n" unless opts[:reference_file]
    error_msg += "-v: \t .vcf file\n" unless opts[:vcf_file]

    # error_msg_optional = ""

    # error_msg_optional += "-c: \tSNP quality cutoff, (default = 90)\n" 
    # error_msg_optional += "-g: \tGenotype quality cutoff (default = 30)\n" 

      unless error_msg == "" 
        puts "Please provide the following required fields:"
        puts error_msg
        # puts "Optional fields:"
        # puts error_msg_optional

        # puts "Please provide a database file name" if opts[:reference_file].empty?
        # puts opts.help unless opts.empty?
        exit
      end       
     
    abort "#{opts[:reference_file]} file does not exist!" unless File.exist?(opts[:reference_file])
    
    abort "#{opts[:vcf_file]} file does not exist!" unless File.exist?(opts[:vcf_file])


  # Name of your database 
  establish_connection(opts[:name_of_database])

  # Schema will run here
  db_schema

  ref = opts[:reference_file]

  sequence_format = guess_sequence_format(ref)

        case sequence_format
        when :genbank
          sequence_flatfile = Bio::FlatFile.open(Bio::GenBank,opts[:reference_file]).next_entry
        when :embl
          sequence_flatfile = Bio::FlatFile.open(Bio::EMBL,opts[:reference_file]).next_entry
        else
          puts "All sequence files should be in genbank or embl format"
          exit
        end

  # The populate_features_and_annotations method populates the features and annotations.  It uses the embl/gbk file.
  populate_features_and_annotations(sequence_flatfile)

  #The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes.  It uses the vcf file, and if specified, the SNP quality cutoff and genotype quality cutoff

  populate_snps_alleles_genotypes(opts[:vcf_file], opts[:cuttoff_ad])

###########################################################

# QUERYING THE DATABASE
elsif opts[:output]

  error_msg = ""
  error_msg += "-f: \t SNPs from specified features in the database OR\n-u: \t Query for unique snps in the database OR\n-i: \t Information on all SNPs\n" unless opts[:all_or_filtered_snps] || opts[:unique_snps] || opts[:info]  

  unless error_msg == "" 
    puts "Please provide the following required fields:"
    puts error_msg
    # puts opts.help unless opts.empty?
    exit
  end

  if opts[:all_or_filtered_snps]

    error_msg = ""

    error_msg += "-d: \t Name of your database\n" unless opts[:name_of_database]
    error_msg += "-o: \t name of your output file\n" unless opts[:out]
    error_msg += "-F: \t Fasta output OR\n-T: \t Tabular output" unless opts[:fasta] || opts[:tabular]
          
    error_msg_optional = ""

    error_msg_optional += "-I,\t --ignore_snps_on_annotation: The name of the feature(s) to ignore.  Features should be seperated by comma (e.g. phages,inserstion,transposons)\n" unless opts[:ignore_snps_on_annotation]
    error_msg_optional += "-a,\t --ignore_strains: A list of strains to ignore\n" unless  opts[:ignore_strains]
    error_msg_optional += "-e,\t --ignore_snps_in_range: A list of position ranges to ignore e.g 10..500,2000..2500\n" unless  opts[:ignore_snps_in_range]
    error_msg_optional += "-c,\t --cuttoff_snp_qual: cuttoff for SNP Quality\n"  unless  opts[:cuttoff_snp_qual]
    error_msg_optional += "-g,\t --cuttoff_genotype: cuttoff for Genotype Quality\n"  unless  opts[:cuttoff_genotype]
    error_msg_optional += "-R,\t --remove_non_informative_snps: Only output informative SNPs\n"  unless  opts[:remove_non_informative_snps]
    error_msg_optional += "-t,\t --tree: Construct tree from output\n" unless  opts[:tree]

    unless error_msg == "" 
      puts "Please provide the following required fields:"
      puts error_msg
      puts "Optional fields:"
      puts error_msg_optional
      # Added this here as it wont appear here in error_msg_optional as its set as default.
      puts "-c,\t --cuttoff_snp_qual: cuttoff for SNP Quality (default 90)\n"
      puts "-g,\t --cuttoff_genotype: cuttoff for Genotype Quality (default 30)\n"
      # puts opts.help unless opts.empty?
      exit
    end

    abort "#{opts[:name_of_database]} database does not exist!" unless File.exist?(opts[:name_of_database])

    establish_connection(opts[:name_of_database])

    get_snps(opts[:out], opts[:ignore_snps_on_annotation], opts[:ignore_snps_in_range], opts[:ignore_strains], opts[:remove_non_informative_snps], opts[:fasta], opts[:tabular], opts[:cuttoff_genotype], opts[:cuttoff_snp_qual], opts[:tree], opts[:fasttree_path])
  end

####################################################################################################
  #FIND UNIQUE SNPS
  if opts[:unique_snps]

    error_msg = ""

    error_msg += "-d: \t Name of your database\n" unless opts[:name_of_database]
    error_msg += "-s: \t List of strains you like to query\n" unless opts[:strain]
    error_msg += "-o: \t Name of the output file\n" unless opts[:out]

    unless error_msg == "" 
      puts "Please provide the following required fields:"
      puts error_msg
      puts "Optional fields:"
      # Added this here as it wont appear here in error_msg_optional as its set as default.
      puts "-c,\t --cuttoff_snp_qual: cuttoff for SNP Quality (default 90)\n"
      puts "-g,\t --cuttoff_genotype: cuttoff for Genotype Quality (default 30)\n"
      # puts opts.help unless opts.empty?
      exit
    end
     
    abort "#{opts[:name_of_database]} database does not exist!" unless File.exist?(opts[:name_of_database])
    abort "#{opts[:strain]} file does not exist!" unless File.exist?(opts[:strain])

    establish_connection(opts[:name_of_database])

    strains = []
      File.read(opts[:strain]).each_line do |line| 
        strains << line.chop
      end
    # find_unique_snps defined in bin/snp-search.rb 
    find_unqiue_snps(strains, opts[:out], opts[:cuttoff_genotype], opts[:cuttoff_snp_qual])
  end
  
##############################################################
  if opts[:info]

    error_msg = ""

    error_msg += "-d: \t the name of your database\n" unless opts[:name_of_database]
    error_msg += "-o: \t name of your output file (in tab-delimited format)\n" unless opts[:out]

    unless error_msg == "" 
      puts "Please provide the following required fields:"
      puts error_msg
      puts "Optional fields:"
      # Added this here as it wont appear here in error_msg_optional as its set as default.
      puts "-c,\t --cuttoff_snp_qual: cuttoff for SNP Quality (default 90)\n"
      puts "-g,\t --cuttoff_genotype: cuttoff for Genotype Quality (default 30)\n"
      # puts opts.help unless opts.empty?
      exit
    end

    abort "#{opts[:name_of_database]} database does not exist!" unless File.exist?(opts[:name_of_database])

    establish_connection(opts[:name_of_database])

    #information defined in bin/snp-search.rb
    information(opts[:out], opts[:cuttoff_genotype], opts[:cuttoff_snp_qual])
    
  end
  
else
   puts opts.help
end