require 'snp-search' require 'snp_db_connection' require 'snp_db_models' require 'snp_db_schema' gem "slop", "~> 2.4.0" require 'slop' opts = Slop.new :help do banner "ruby snp-search [OPTIONS]" on :V, :verbose, 'Enable verbose mode' on :n, :name=, 'Name of database, (default: snp_db.sqlite3)', :default => 'snp_db.sqlite3' on :r, :reference_file=, 'Reference genome file, in gbk or embl file format, Required', true on :v, :vcf_file=, '.vcf file, Required', true on :s, :strain=, 'text file with a list of strains/samples, Required', true on :c, :cuttoff_snp=, 'SNP quality cutoff, (default = 90)', :default => 90 on :t, :cuttoff_genotype=, 'Genotype quality cutoff (default = 30)', :default => 30 on_empty do puts help end end opts.parse puts "You must supply the -r option, it's a required field" and exit unless opts[:reference_file] puts "You must supply the -v option, it's a required field" and exit unless opts[:vcf_file] puts "You must supply the -s option, it's a required field" and exit unless opts[:strain] begin puts "#{opts[:reference_file]} file does not exist!" and exit unless File.exist?(opts[:reference_file]) rescue end begin puts "#{opts[:vcf_file]} file does not exist!" and exit unless File.exist?(opts[:vcf_file]) rescue end begin puts "#{opts[:strain]} file does not exist!" and exit unless File.exist?(opts[:strain]) rescue end begin strains = [] File.read(opts[:strain]).each_line do |line| strains << line.chop end # Enter the name of your database establish_connection(opts[:name]) # Schema will run here db_schema ref = opts[:reference_file] sequence_format = guess_sequence_format(ref) case sequence_format when :genbank sequence_flatfile = Bio::FlatFile.open(Bio::GenBank,opts[:reference_file]).next_entry when :embl sequence_flatfile = Bio::FlatFile.open(Bio::EMBL,opts[:reference_file]).next_entry else puts "All sequence files should be of genbank or embl format" exit end # path for vcf file here vcf_mpileup_file = opts[:vcf_file] # The populate_strains method populates the strains in the db. It uses the strain names in array. populate_strains(strains) # The populate_features_and_annotations method populates the features and annotations. It uses the embl/gbk file. populate_features_and_annotations(sequence_flatfile) #The populate_snps_alleles_genotypes method populates the snps, alleles and genotypes. It uses the strain names (array) and vcf file. populate_snps_alleles_genotypes(strains, vcf_mpileup_file, opts[:cuttoff_snp].to_i, opts[:cuttoff_genotype].to_i) rescue end