snp-search.rb in snp-search-0.22.0

- old
+ new

@@ -14,19 +14,10 @@
     file_format = :embl
   end
   return file_format
 end
 
-# A method to populate the strain names in the Strain table.  strain_names is an array of strain names.
-def populate_strains(strain_names)
-	strain_names.each do |strain|
-	s = Strain.new
-	s.name = strain
-	s.save
-	end
-end
-
 # A method to populate the database with the features (genes etc) and the annotations from the embl file.  
 # We include all features that are not 'source' or 'gene' as they are repetitive info.  'CDS' is the gene.
 # The annotation table includes also the start and end coordinates of the CDS.  The strand is also included.  the 'locations' method is defined in bioruby under genbank.  It must be required at the top (bio).
 # Also, the qualifier and value are extracted from the embl file and added to the database.
 def populate_features_and_annotations(sequence_file)
@@ -54,24 +45,34 @@
 
 
 #This method populates the rest of the information, i.e. SNP information, Alleles and Genotypes.
 # It requires the strain_names as array and the output (vcf file) from mpileup-snp identification algorithm.
 
-def populate_snps_alleles_genotypes(strain_names, vcf_file, cuttoff_snp, cuttoff_genotype)
-	strains = Array.new
- 	strain_names.each do |strain_name|
-  		strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
-  		strains << strain
-	end
+def populate_snps_alleles_genotypes(vcf_file, cuttoff_snp, cuttoff_genotype)
+	
 
 # open vcf file and parse each line
 	File.open(vcf_file) do |f|
 	  # header names
 	  header = f.gets
 	  header2 = f.gets.chomp
 	  column_headings = header2.split("\t")
-	  sample_names = column_headings[9..-1]
-	 
+	  strain_names = column_headings[9..-1]
+	 strain_names.map!{|name| name.sub(/\..*/, '')}
+	
+	strain_names.each do |str|
+		ss = Strain.new
+		ss.name = str
+		ss.save
+	end
+	
+	strains = Array.new
+ 	strain_names.each do |strain_name|
+   		strain = Strain.find_by_name(strain_name) # equivalent to Strain.find.where("strains.name=?", strain_name).first
+   		strains << strain
+	 end
+
+	  
 	  good_snps = 0
 	  # start parsing snps
 		while line = f.gets
 		    details = line.split("\t")
 		    ref = details[0]