lib/output_information_methods.rb in snp-search-2.10.8 vs lib/output_information_methods.rb in snp-search-2.11.0
- old
+ new
@@ -11,41 +11,40 @@
snps_counter = 0
cds_snps_counter = 0
total_number_of_syn_snps = 0
total_number_of_non_syn_snps = 0
total_number_of_pseudo = 0
- snps.each do |snp|
+ snps.each do |snp|
ActiveRecord::Base.transaction do
snp.alleles.each do |allele|
next if snp.alleles.any?{|allele| allele.base.length > 1} # indel
if allele.id != snp.reference_allele_id
-
+
# get annotation (if there is any) for each SNP
features = Feature.joins(:snps).where("snps.id = ?", snp.id)
# get snp quality for each snp
snp_qual = Snp.find_by_sql("select qual from snps where snps.id = #{snp.id}")
- next if snp_qual.any?{|snps_quality| snps_quality.qual < cuttoff_snp.to_i}
# ignore snp if the snp qual is less than cuttoff.
- # next if snp.snp_qual < cuttoff_snp.to_i
+ next if snp_qual.any?{|snps_quality| snps_quality.qual < cuttoff_snp.to_i}
+
# get all genotype qualities for each snp.
gqs = Genotype.find_by_sql("select geno_qual from genotypes inner join alleles on alleles.id = genotypes.allele_id inner join snps on snps.id = alleles.snp_id where snps.id = #{snp.id}")
# ignore snp if any of its genotype qualities is lower than the cuttoff.
next if gqs.any?{|genotype_quality| genotype_quality.geno_qual < cuttoff_genotype.to_i}
ref_base = Bio::Sequence.auto(Allele.find(snp.reference_allele_id).base)
snp_base = Bio::Sequence.auto(allele.base)
# count snps now: after you have selected the snps with gqs and snp_qual greater than the threshold.
- snps_counter += 1
+ snps_counter += 1
# If the feature is empty then just output basic information about the snp.
if features.empty?
outfile.puts "#{snp.ref_pos}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}"
-
- else
+ else
features.each do |feature|
if feature.name == "CDS"
cds_snps_counter +=1
@@ -89,45 +88,49 @@
small = ["V","C","A","G","D","N","S","T","P"]
non_small = ["I","L","M","F","Y","W","H","K","R","E","Q"]
# Get alleles for each strain
- alleles_array = []
+ bases_from_alleles = []
strains.each do |strain|
+
allele_for_strains = Allele.joins(:genotypes => :strain).where("strains.id = ? AND alleles.snp_id = ?", strain.id, snp.id).first
- alleles_array << allele_for_strains.base
+ # allele_for_strains = Allele.find_by_sql("select * from alleles inner join genotypes on genotypes.allele_id = alleles.id inner join strains on strains.id = genotypes.strain_id where strains.id = #{strain.id} and alleles.snp_id = #{snp.id}")
+ puts allele_for_strains.inspect
+ # next if bases_from_alleles.empty?
+ bases_from_alleles << allele_for_strains.base
end
# If no difference between the amino acids then its synonymous SNP, if different then its non-synonymous.
if original_seq_translated_clean == mutated_seq_translated_clean
total_number_of_syn_snps +=1
if mutated_seq_translated_clean =~ /\*/
total_number_of_pseudo +=1
- outfile.puts "#{snp.ref_pos}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tsynonymous\t#{annotation.value}\tYes\tN/A\tN/A\tN/A\tN/A\tN/A\t#{alleles_array.join("\t") if info}"
+ outfile.puts "#{snp.ref_pos}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tsynonymous\t#{annotation.value}\tYes\tN/A\tN/A\tN/A\tN/A\tN/A\t#{bases_from_alleles.join("\t") if info}"
else
- outfile.puts "#{snp.ref_pos}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tsynonymous\t#{annotation.value}\tNo\tN/A\tN/A\tN/A\tN/A\tN/A\t#{alleles_array.join("\t") if info}"
+ outfile.puts "#{snp.ref_pos}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tsynonymous\t#{annotation.value}\tNo\tN/A\tN/A\tN/A\tN/A\tN/A\t#{bases_from_alleles.join("\t") if info}"
end
else
total_number_of_non_syn_snps +=1
diffs = Diff::LCS.diff(original_seq_translated_clean, mutated_seq_translated_clean)
if mutated_seq_translated_clean =~ /\*/
total_number_of_pseudo +=1
- outfile.puts "#{snp.ref_pos}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tnon-synonymous\t#{annotation.value}\tYes\t#{diffs[0][0].element}\t#{diffs[0][1].element}\t#{'Yes' if (hydrophobic.include? diffs[0][0].element) == (non_hydrophobic.include? diffs[0][1].element)}#{'No' if (hydrophobic.include? diffs[0][0].element) != (non_hydrophobic.include? diffs[0][1].element)}\t#{'Yes' if (polar.include? diffs[0][0].element) == (non_polar.include? diffs[0][1].element)}#{'No' if (polar.include? diffs[0][0].element) != (non_polar.include? diffs[0][1].element)}\t#{'Yes' if (small.include? diffs[0][0].element) == (non_small.include? diffs[0][1].element)}#{'No' if (small.include? diffs[0][0].element) != (non_small.include? diffs[0][1].element)}\t#{alleles_array.join("\t") if info}"
+ outfile.puts "#{snp.ref_pos}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tnon-synonymous\t#{annotation.value}\tYes\t#{diffs[0][0].element}\t#{diffs[0][1].element}\t#{'Yes' if (hydrophobic.include? diffs[0][0].element) == (non_hydrophobic.include? diffs[0][1].element)}#{'No' if (hydrophobic.include? diffs[0][0].element) != (non_hydrophobic.include? diffs[0][1].element)}\t#{'Yes' if (polar.include? diffs[0][0].element) == (non_polar.include? diffs[0][1].element)}#{'No' if (polar.include? diffs[0][0].element) != (non_polar.include? diffs[0][1].element)}\t#{'Yes' if (small.include? diffs[0][0].element) == (non_small.include? diffs[0][1].element)}#{'No' if (small.include? diffs[0][0].element) != (non_small.include? diffs[0][1].element)}\t#{bases_from_alleles.join("\t") if info}"
else
- outfile.puts "#{snp.ref_pos-1}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tnon-synonymous\t#{annotation.value}\tNo\t#{diffs[0][0].element}\t#{diffs[0][1].element}\t#{'Yes' if (hydrophobic.include? diffs[0][0].element) == (non_hydrophobic.include? diffs[0][1].element)}#{'No' if (hydrophobic.include? diffs[0][0].element) != (non_hydrophobic.include? diffs[0][1].element)}\t#{'Yes' if (polar.include? diffs[0][0].element) == (non_polar.include? diffs[0][1].element)}#{'No' if (polar.include? diffs[0][0].element) != (non_polar.include? diffs[0][1].element)}\t#{'Yes' if (small.include? diffs[0][0].element) == (non_small.include? diffs[0][1].element)}#{'No' if (small.include? diffs[0][0].element) != (non_small.include? diffs[0][1].element)}\t#{alleles_array.join("\t") if info}"
+ outfile.puts "#{snp.ref_pos-1}\t#{features.map{|feature| feature.strand == 1} ? "#{ref_base.upcase}" : "#{ref_base.reverse_complement.upcase}"}\t#{features.map{|feature| feature.strand == 1} ? "#{snp_base.upcase}" : "#{snp_base.reverse_complement.upcase}"}\tnon-synonymous\t#{annotation.value}\tNo\t#{diffs[0][0].element}\t#{diffs[0][1].element}\t#{'Yes' if (hydrophobic.include? diffs[0][0].element) == (non_hydrophobic.include? diffs[0][1].element)}#{'No' if (hydrophobic.include? diffs[0][0].element) != (non_hydrophobic.include? diffs[0][1].element)}\t#{'Yes' if (polar.include? diffs[0][0].element) == (non_polar.include? diffs[0][1].element)}#{'No' if (polar.include? diffs[0][0].element) != (non_polar.include? diffs[0][1].element)}\t#{'Yes' if (small.include? diffs[0][0].element) == (non_small.include? diffs[0][1].element)}#{'No' if (small.include? diffs[0][0].element) != (non_small.include? diffs[0][1].element)}\t#{bases_from_alleles.join("\t") if info}"
end
end
end
end
end
end
+ puts "Total SNPs added so far: #{snps_counter}" if snps_counter % 100 == 0
end
end
- puts "Total SNPs added so far: #{snps_counter}" if snps_counter % 100 == 0
end
end
- puts "Total number of snps: #{snps_counter}"
+ puts "Total number of snps: #{snps_counter} with Genotype quality cutoff at #{cuttoff_genotype} and SNP quality cutoff at #{cuttoff_snp}"
puts "Total number of snps in CDS region: #{cds_snps_counter}"
puts "Total number of synonymous SNPs: #{total_number_of_syn_snps}"
puts "Total number of non-synonymous SNPs: #{total_number_of_non_syn_snps}"
puts "Total number of pseudogenes: #{total_number_of_pseudo}"
outfile.puts "Total number of snps: #{snps_counter}"