# This query script removes the 'phage' genes from the database. # Only use this script once your database has been fully populated. # Usage: ruby example1.rb -d your_db_name.sqlite3 -s list_of_your_species.txt -o output.fasta # You may use this script to do other SQL queries that result in a fasta output. Just change the 'snps' SQL query below with your query. require 'snp_db_models' gem "slop", "~> 2.4.0" require 'slop' opts = Slop.new :help do banner "ruby query.rb [OPTIONS]" on :V, :verbose, 'Enable verbose mode' on :d, :database=, 'The name of the database you like to query', true on :o, :outfile=, 'output file, in fasta format', true on :s, :strain=, 'The strains/samples you like to query', true on :a, :annotation=, 'The gene you like to remove from analysis', true on_empty do puts help end end opts.parse puts "You must supply the -s option, it's a required field" and exit unless opts[:strain] puts "You must supply the -d option, it's a required field" and exit unless opts[:database] begin puts "#{opts[:database]} file does not exist!" and exit unless File.exist?(opts[:database]) rescue end begin puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain]) rescue end annotation = opts[:annotation] establish_connection(opts[:database]) begin strains = [] File.read(opts[:strain]).each_line do |line| strains << line.chop end # Enter the name of your database outfile = File.open(opts[:outfile], "w") # create a sequence hash sequence_hash = Hash.new # create an array of strains # hash key is strain_name, loop through strain_names # create an empty array strains.each do |strain_name| sequence_hash[strain_name] = Array.new end snps = Snp.find_by_sql("SELECT snps.* FROM snps INNER JOIN features ON features.id = snps.feature_id WHERE features.id IN (select features.id from features WHERE id NOT IN (select distinct features.id FROM features INNER JOIN annotations ON annotations.feature_id = features.id WHERE annotations.value LIKE '%(#{annotation})%'))") #puts snps.size puts "Your Query is submitted and is being processed......." snps.each do |snp| #break if i == 100 snp.alleles.each do |allele| allele.genotypes.each do |genotype| # puts genotype.inspect sequence_hash[genotype.strain.name] << allele.base end end end strains.each do |sn| outfile.print ">#{sn}\n" , sequence_hash[sn].join("") outfile.puts end rescue end