# This query script removes the 'phage' genes from the database.
# Only use this script once your database has been fully populated.
# Usage: ruby example1.rb -d your_db_name.sqlite3 -s list_of_your_species.txt -o output.fasta 
# You may use this script to do other SQL queries that result in a fasta output.  Just change the 'snps' SQL query below with your query.
require 'snp_db_models'
gem "slop", "~> 2.4.0"
require 'slop'

opts = Slop.new :help do
  banner "ruby query.rb [OPTIONS]"

  on :V, :verbose, 'Enable verbose mode'
  on :d, :database=, 'The name of the database you like to query', true
  on :o, :outfile=, 'output file, in fasta format', true  
  on :s, :strain=, 'The strains/samples you like to query', true
  on :a, :annotation=, 'The gene you like to remove from analysis', true
     
  on_empty do
    puts help
  end
end
opts.parse

  puts "You must supply the -s option, it's a required field" and exit unless opts[:strain] 
  puts "You must supply the -d option, it's a required field" and exit unless opts[:database] 

begin 
	puts "#{opts[:database]} file does not exist!" and exit unless File.exist?(opts[:database])
rescue
end
 
begin
	puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain])
rescue
end

annotation = opts[:annotation]
establish_connection(opts[:database])
   
begin
strains = []
  File.read(opts[:strain]).each_line do |line| 
    strains << line.chop
  end
 
# Enter the name of your database 

outfile = File.open(opts[:outfile], "w")


# create a sequence hash
sequence_hash = Hash.new

# create an array of strains

# hash key is strain_name, loop through strain_names
# create an empty array
strains.each do |strain_name|
	sequence_hash[strain_name] = Array.new
end

snps = Snp.find_by_sql("SELECT snps.* FROM snps 
          INNER JOIN features 
          ON features.id = snps.feature_id 
          WHERE features.id IN 
            (select features.id from features 
            WHERE id NOT IN 
              (select distinct features.id FROM features 
              INNER JOIN annotations ON 
              annotations.feature_id = features.id 
              WHERE annotations.value LIKE '%(#{annotation})%'))") 


#puts snps.size
puts "Your Query is submitted and is being processed......."
snps.each do |snp|
	#break if i == 100
	snp.alleles.each do |allele|
		allele.genotypes.each do |genotype|
			#	puts genotype.inspect
			sequence_hash[genotype.strain.name] << allele.base
		end
	end
end
	
strains.each do |sn|
	outfile.print ">#{sn}\n" , sequence_hash[sn].join("")
	outfile.puts
end

rescue
end