vendor/bin/mac/vhm.py in big_simon-0.0.1 vs vendor/bin/mac/vhm.py in big_simon-0.1.0

- old
+ new

@@ -44,12 +44,13 @@ ## log file ## filelog = open(os.path.join(tmpDir, 'vhm.log'), 'w') ## name length ## -nameLen = 93 - len(options.outDir) +# nameLen = 93 - len(options.outDir) #### possibly because of the kmercount folder name for each contig is too long? +nameLen = 99999999999999 #################### 0: preparation ############################ ## path to the programs @@ -209,11 +210,11 @@ sys.stdout.write("WARNING: no hostTaxa file provided, creating a dummy one \n") filelog.write("WARNING: no hostTaxa file provided, creating a dummy one \n") hostTaxaFileWrite = open(hostTaxaFile, 'w') ## make file blank hostTaxaFileWrite.close() hostTaxaFileWrite = open(hostTaxaFile, 'a') - + hostTaxaFileWrite.write("hostNCBIName hostName hostSuperkingdom hostPhylum hostClass hostOrder hostFamily hostGenus hostSpecies\n") for currentFileName in hostFaList : if currentFileName.startswith('.') : continue if len(currentFileName) > nameLen : @@ -233,11 +234,11 @@ else : hostTaxaFile = os.path.join(options.outDir, os.path.basename(options.hostTaxaFile)+"_new.txt") hostTaxaTable = numpy.genfromtxt(options.hostTaxaFile,delimiter="\t", dtype=str) hostTaxaTable[hostTaxaTable=='']='unknown' numpy.savetxt(hostTaxaFile, hostTaxaTable, fmt="%s", delimiter='\t', newline='\n') - + filelog.flush() #################### 1: count kmer and prepare list files ############################ #sys.stdout.write("Step 1: counting kmers \n") @@ -257,11 +258,11 @@ currentFileNameS = currentFileName sys.stdout.write("Step 1: counting kmers for virus " + currentFileNameS + "\n") filelog.write("Step 1: counting kmers for virus " + currentFileNameS + "\n") for w in range(1, (kmax+1)) : currentFilePath = os.path.join(options.virusFaDir, currentFileName) - + currentKmerCountPath = os.path.join(kmerCountPath, currentFileNameS) cmdKmer = countKmerOut + " -l -k " + str(w) + \ " -i " + currentFilePath +\ " -o " + currentKmerCountPath +\ " -s " + currentFileNameS @@ -276,13 +277,13 @@ str(2) + "\n") else : sys.stderr.write( "ERROR in counting kmers for " + currentFileNameS + "\n") filelog.write( "ERROR in counting kmers for " + currentFileNameS + "\n") sys.exit(0) - + filelog.flush() - + end_time = time.time() count += 1 #sys.stdout.write(str(end_time - start_time) + "s for " + str(count) + " seqs \n") sys.stdout.write(" (Average time for counting kmers for one virus: " + str(round((end_time - start_time)/count,4)) + "s) \n" ) sys.stdout.write(" (ETR for counting kmers for viruses: " + str(round((end_time - start_time)/count*(len(virusFaList)-count), 4)) + "s) \n") @@ -327,13 +328,13 @@ str(2) + "\n") else : sys.stderr.write( "ERROR in counting kmers for " + currentFileNameS + "\n") filelog.write( "ERROR in counting kmers for " + currentFileNameS + "\n") sys.exit(0) - + filelog.flush() - + end_time = time.time() count += 1 #sys.stdout.write(str(end_time - start_time) + "s for " + str(count) + " seqs \n") sys.stdout.write(" (Average time for counting kmers for one host: " + str(round((end_time - start_time)/count, 4)) + "s) \n") sys.stdout.write(" (ETR for counting kmers for hosts: " + str(round((end_time - start_time)/count*(len(hostFaList)-count), 4)) + "s) \n") @@ -364,10 +365,10 @@ #for c in iter(lambda: cmdCptMeasureOut.stderr.read(1), ''): for c in iter(cmdCptMeasureOut.stderr.readline, b''): sys.stdout.write(c.decode("utf-8")) filelog.write(c.decode("utf-8")) filelog.flush() - + end_time = time.time() count += 1 sys.stdout.write(" (Average time for computing dissimilarities for one virus-host pair: " + str(round((end_time - start_time)/count/len(virusFaList), 4)) + "s) \n") sys.stdout.write(" (ETR for computing dissimilarities for virus-host pairs: " + str(round((end_time - start_time)/count*(len(hostFaList)-count), 4)) + "s) \n") filelog.write(" (Average time for computing dissimilarities for one virus-host pair: " + str(round((end_time - start_time)/count/len(virusFaList), 4)) + "s) \n")