vendor/bin/mac/vhm.py in big_simon-0.0.1 vs vendor/bin/mac/vhm.py in big_simon-0.1.0
- old
+ new
@@ -44,12 +44,13 @@
## log file ##
filelog = open(os.path.join(tmpDir, 'vhm.log'), 'w')
## name length ##
-nameLen = 93 - len(options.outDir)
+# nameLen = 93 - len(options.outDir)
#### possibly because of the kmercount folder name for each contig is too long?
+nameLen = 99999999999999
#################### 0: preparation ############################
## path to the programs
@@ -209,11 +210,11 @@
sys.stdout.write("WARNING: no hostTaxa file provided, creating a dummy one \n")
filelog.write("WARNING: no hostTaxa file provided, creating a dummy one \n")
hostTaxaFileWrite = open(hostTaxaFile, 'w') ## make file blank
hostTaxaFileWrite.close()
hostTaxaFileWrite = open(hostTaxaFile, 'a')
-
+
hostTaxaFileWrite.write("hostNCBIName hostName hostSuperkingdom hostPhylum hostClass hostOrder hostFamily hostGenus hostSpecies\n")
for currentFileName in hostFaList :
if currentFileName.startswith('.') :
continue
if len(currentFileName) > nameLen :
@@ -233,11 +234,11 @@
else :
hostTaxaFile = os.path.join(options.outDir, os.path.basename(options.hostTaxaFile)+"_new.txt")
hostTaxaTable = numpy.genfromtxt(options.hostTaxaFile,delimiter="\t", dtype=str)
hostTaxaTable[hostTaxaTable=='']='unknown'
numpy.savetxt(hostTaxaFile, hostTaxaTable, fmt="%s", delimiter='\t', newline='\n')
-
+
filelog.flush()
#################### 1: count kmer and prepare list files ############################
#sys.stdout.write("Step 1: counting kmers \n")
@@ -257,11 +258,11 @@
currentFileNameS = currentFileName
sys.stdout.write("Step 1: counting kmers for virus " + currentFileNameS + "\n")
filelog.write("Step 1: counting kmers for virus " + currentFileNameS + "\n")
for w in range(1, (kmax+1)) :
currentFilePath = os.path.join(options.virusFaDir, currentFileName)
-
+
currentKmerCountPath = os.path.join(kmerCountPath, currentFileNameS)
cmdKmer = countKmerOut + " -l -k " + str(w) + \
" -i " + currentFilePath +\
" -o " + currentKmerCountPath +\
" -s " + currentFileNameS
@@ -276,13 +277,13 @@
str(2) + "\n")
else :
sys.stderr.write( "ERROR in counting kmers for " + currentFileNameS + "\n")
filelog.write( "ERROR in counting kmers for " + currentFileNameS + "\n")
sys.exit(0)
-
+
filelog.flush()
-
+
end_time = time.time()
count += 1
#sys.stdout.write(str(end_time - start_time) + "s for " + str(count) + " seqs \n")
sys.stdout.write(" (Average time for counting kmers for one virus: " + str(round((end_time - start_time)/count,4)) + "s) \n" )
sys.stdout.write(" (ETR for counting kmers for viruses: " + str(round((end_time - start_time)/count*(len(virusFaList)-count), 4)) + "s) \n")
@@ -327,13 +328,13 @@
str(2) + "\n")
else :
sys.stderr.write( "ERROR in counting kmers for " + currentFileNameS + "\n")
filelog.write( "ERROR in counting kmers for " + currentFileNameS + "\n")
sys.exit(0)
-
+
filelog.flush()
-
+
end_time = time.time()
count += 1
#sys.stdout.write(str(end_time - start_time) + "s for " + str(count) + " seqs \n")
sys.stdout.write(" (Average time for counting kmers for one host: " + str(round((end_time - start_time)/count, 4)) + "s) \n")
sys.stdout.write(" (ETR for counting kmers for hosts: " + str(round((end_time - start_time)/count*(len(hostFaList)-count), 4)) + "s) \n")
@@ -364,10 +365,10 @@
#for c in iter(lambda: cmdCptMeasureOut.stderr.read(1), ''):
for c in iter(cmdCptMeasureOut.stderr.readline, b''):
sys.stdout.write(c.decode("utf-8"))
filelog.write(c.decode("utf-8"))
filelog.flush()
-
+
end_time = time.time()
count += 1
sys.stdout.write(" (Average time for computing dissimilarities for one virus-host pair: " + str(round((end_time - start_time)/count/len(virusFaList), 4)) + "s) \n")
sys.stdout.write(" (ETR for computing dissimilarities for virus-host pairs: " + str(round((end_time - start_time)/count*(len(hostFaList)-count), 4)) + "s) \n")
filelog.write(" (Average time for computing dissimilarities for one virus-host pair: " + str(round((end_time - start_time)/count/len(virusFaList), 4)) + "s) \n")