lib/snp-search.rb in snp-search-0.21.0 vs lib/snp-search.rb in snp-search-0.22.0
- old
+ new
@@ -14,19 +14,10 @@
file_format = :embl
end
return file_format
end
-# A method to populate the strain names in the Strain table. strain_names is an array of strain names.
-def populate_strains(strain_names)
- strain_names.each do |strain|
- s = Strain.new
- s.name = strain
- s.save
- end
-end
-
# A method to populate the database with the features (genes etc) and the annotations from the embl file.
# We include all features that are not 'source' or 'gene' as they are repetitive info. 'CDS' is the gene.
# The annotation table includes also the start and end coordinates of the CDS. The strand is also included. the 'locations' method is defined in bioruby under genbank. It must be required at the top (bio).
# Also, the qualifier and value are extracted from the embl file and added to the database.
def populate_features_and_annotations(sequence_file)
@@ -54,24 +45,34 @@
#This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
# It requires the strain_names as array and the output (vcf file) from mpileup-snp identification algorithm.
-def populate_snps_alleles_genotypes(strain_names, vcf_file, cuttoff_snp, cuttoff_genotype)
- strains = Array.new
- strain_names.each do |strain_name|
- strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
- strains << strain
- end
+def populate_snps_alleles_genotypes(vcf_file, cuttoff_snp, cuttoff_genotype)
+
# open vcf file and parse each line
File.open(vcf_file) do |f|
# header names
header = f.gets
header2 = f.gets.chomp
column_headings = header2.split("\t")
- sample_names = column_headings[9..-1]
-
+ strain_names = column_headings[9..-1]
+ strain_names.map!{|name| name.sub(/\..*/, '')}
+
+ strain_names.each do |str|
+ ss = Strain.new
+ ss.name = str
+ ss.save
+ end
+
+ strains = Array.new
+ strain_names.each do |strain_name|
+ strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
+ strains << strain
+ end
+
+
good_snps = 0
# start parsing snps
while line = f.gets
details = line.split("\t")
ref = details[0]