# This method performs several queries to ignore elements of the data for fasta or tabular output. # Its is called in lib/snp-search.rb require 'output_information_methods' def get_snps(out, ignore_snps_on_annotation, ignore_snps_in_range, ignore_strains, remove_non_informative_snps, fasta_output, tabular_output, cutoff_genotype, cutoff_snp, tree, fasttree_path) strains = Strain.all sequence_hash = Hash.new sequence_hash["ref"] = Array.new strains.each do |strain| sequence_hash[strain.name] = Array.new end snps_array = Array.new snp_positions = Array.new # output opened for data input output = File.open(out, "w") tab_delim_file_name = File.basename(out, File.extname(out)) + "_snps.tsv" tab_delim_file = File.open(tab_delim_file_name, "w") position_map_file_name = File.basename(out, File.extname(out)) + "_snps_positions.txt" position_map_file = File.open(position_map_file_name, "w") snps_within_features_with_annotation = "" # Perform query # puts ignore_snps_on_annotation.inspect if ignore_snps_on_annotation annotations_where = ignore_snps_on_annotation.split(",").map{|annotation| "annotations.value LIKE '%#{annotation}%'"}.join(" OR ") features_with_annotation = Feature.joins(:annotations).where(annotations_where) snps_within_features_with_annotation = Snp.joins(:features).where("features.id IN (?)", features_with_annotation.collect{|feature| feature.id}) end if snps_within_features_with_annotation.empty? snps = Snp.all else snps = Snp.where("snps.id NOT IN (?)", snps_within_features_with_annotation.collect{|snp| snp.id}) end positions_to_ignore = Array.new if ignore_snps_in_range range_strings = ignore_snps_in_range.split(",") range_strings.each do |range| start_position, end_position = range.split("..") positions_to_ignore += (start_position.to_i..end_position.to_i).to_a end end if ignore_strains strains_to_ignore = ignore_strains.split(",") end i = 0 puts "Your Query is submitted and is being processed......." strains = Strain.find(:all) if ignore_strains strains_to_ignore = ignore_strains.split(",") strains.reject!{|strain| strains_to_ignore.include?(strain.name)} end snps.each do |snp| ActiveRecord::Base.transaction do i += 1 next if positions_to_ignore.include?(snp.ref_pos) # Ignore positions that user specified alleles = snp.alleles genotypes = snp.alleles.collect{|allele| allele.genotypes}.flatten snp_qual = Snp.find_by_sql("select qual from snps where snps.id = #{snp.id}") # ignore snp if the snp qual is less than cutoff. next if snp_qual.any?{|snps_quality| snps_quality.qual < cutoff_snp.to_i} next if alleles.any?{|allele| allele.base.length > 1} # indel next unless genotypes.all?{|genotype| genotype.geno_qual >= cutoff_genotype.to_f} # all geno quals > cutoff # puts "#{i} SNPs processed so far" if i % 100 == 0 strain_alleles = Hash.new strains.each do |strain| strain_genotype = genotypes.select{|genotype| genotype.strain_id == strain.id}.first # next if strain_genotype == nil strain_allele = alleles.select{|allele| allele.id == strain_genotype.allele_id}.first strain_alleles[strain.name] = strain_allele.base end if remove_non_informative_snps next if strain_alleles.values.uniq.size == 1 # remove non-informative SNPs end snp_positions << snp.ref_pos snps_array << snp strain_alleles.each do |strain_name, allele_base| sequence_hash[strain_name] << allele_base end sequence_hash["ref"] << snp.reference_allele.base end end # If user has specified a tabular output if tabular_output output_information_methods(snps_array, output, cutoff_genotype, cutoff_snp, true) # If user has specified a fasta output elsif fasta_output # generate FASTA file output.puts ">ref\n#{sequence_hash["ref"].join("")}" tab_delim_file.puts "\t#{snp_positions.join("\t")}" tab_delim_file.puts "ref\t#{sequence_hash["ref"].join("\t")}" strains.each do |strain| output.puts ">#{strain.name}\n#{sequence_hash[strain.name].join("")}" tab_delim_file.puts "#{strain.name}\t#{sequence_hash[strain.name].join("\t")}" end snp_positions.each_with_index do |snp_position, index| position_map_file.puts "#{index+1} => #{snp_position}" end end # If user has chosen a newick output. if tree nwk_out_file_name = File.basename(out, File.extname(out)) + ".nwk" puts "running phylogeny" `#{fasttree_path} -fastest -nt #{output} > #{nwk_out_file_name}` end output.close tab_delim_file.close end