# This query script finds the unique snps amongs the list of strains provided. # Only use this script once your database has been fully populated. # Usage: ruby example2.rb -d your_db_name.sqlite3 -s list_of_your_species.txt # Output is the number of unique snps in the list of your strains provided in the -s option. # You may use this script to do other SQL queries. Just change the SQL query below with your query. require 'snp_db_models' gem "slop", "~> 2.4.0" require 'slop' opts = Slop.new :help do banner "ruby query.rb [OPTIONS]" on :V, :verbose, 'Enable verbose mode' on :D, :database=, 'The name of the database you like to query', true on :s, :strain=, 'The strains/samples you like to query', true on_empty do puts help end end opts.parse puts "You must supply the -D option, it's a required field" and exit unless opts[:database] puts "You must supply the -s option, it's a required field" and exit unless opts[:strain] begin puts "#{opts[:database]} file does not exist!" and exit unless File.exist?(opts[:database]) rescue end begin puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain]) rescue end establish_connection(opts[:database]) begin strains = [] File.read(opts[:strain]).each_line do |line| strains << line.chop end def find_shared_snps(strain_names) *strain_names = strain_names where_statement = strain_names.collect{|strain_name| "strains.name = '#{strain_name}' OR "}.join("").sub(/ OR $/, "") return Snp.find_by_sql("SELECT * FROM (SELECT features.* from features INNER JOIN snps ON features.id = snps.feature_id INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id INNER JOIN strains ON strains.id = genotypes.strain_id WHERE (#{where_statement}) AND alleles.id <> snps.reference_allele_id AND (SELECT COUNT(*) from snps AS s INNER JOIN alleles ON alleles.snp_id = snps.id INNER JOIN genotypes ON alleles.id = genotypes.allele_id WHERE alleles.id <> snps.reference_allele_id and s.id = snps.id) = #{strain_names.size} GROUP BY snps.id HAVING COUNT(*) = #{strain_names.size})"); end gas_snps = find_shared_snps(strains) gas_snps.each do |snp| puts "The number of unique snps are #{snp.id}" end rescue end