lib/scripts/extract_ECF.py in ecf_classify-1.0.1 vs lib/scripts/extract_ECF.py in ecf_classify-1.0.2
- old
+ new
@@ -33,48 +33,48 @@
with open(args.pfam, 'r') as reader:
for row in reader:
if not row.startswith('#'):
row = row.strip().split(' ')
row=list(filter(None, row))
- if row[0] == 'Sigma70_r2':
- if not hmmscan_cons[row[3]][0]:
- hmmscan_cons[row[3]][0] = (row[17],row[18])
- elif (row[0] == 'Sigma70_r4' or row[0] == 'Sigma70_r4_2'):
- if not hmmscan_cons[row[3]][1]:
- hmmscan_cons[row[3]][1] = (row[17],row[18])
+ if row[3] == 'Sigma70_r2':
+ if not hmmscan_cons[row[0]][0]:
+ hmmscan_cons[row[0]][0] = (row[17],row[18])
+ elif (row[3] == 'Sigma70_r4' or row[3] == 'Sigma70_r4_2'):
+ if not hmmscan_cons[row[0]][1]:
+ hmmscan_cons[row[0]][1] = (row[17],row[18])
#presence of sigma3
hmmscan_s3 = {protein:False for protein in proteins}
with open(args.sigma3, 'r') as reader:
for row in reader:
if not row.startswith('#'):
row = row.strip().split(' ')
row=list(filter(None, row))
- if row[0] == 'Sigma70_r3':
- hmmscan_s3[row[3]] = True
+ if row[3] == 'Sigma70_r3':
+ hmmscan_s3[row[0]] = True
#Score of the general HMM
positives = {protein:0 for protein in proteins}
with open(args.general, 'r') as reader:
for row in reader:
if not row.startswith('#'):
row = row.strip().split(' ')
row=list(filter(None, row))
- positives[row[3]] = float(row[7])
+ positives[row[0]] = float(row[7])
# Extract postions of sigma2 and sigma4
hmmscan = {protein:{'Sigma2': [0,0,10], 'Sigma4': [0,0,10]} for protein in proteins}
with open(args.pfam, 'r') as reader:
for row in reader:
if not row.startswith('#'):
row = row.strip().split(' ')
row=list(filter(None, row))
- if row[0] == 'Sigma70_r2' and float(row[11]) < hmmscan[row[3]]['Sigma2'][2]:
- hmmscan[row[3]]['Sigma2'] = [int(row[19]), int(row[20]), float(row[11])]
- elif (row[0] == 'Sigma70_r4' or row[0] == 'Sigma70_r4_2')\
- and float(row[11]) < hmmscan[row[3]]['Sigma4'][2]:
- hmmscan[row[3]]['Sigma4'] = [int(row[19]), int(row[20]), float(row[11])]
+ if row[3] == 'Sigma70_r2' and float(row[11]) < hmmscan[row[0]]['Sigma2'][2]:
+ hmmscan[row[0]]['Sigma2'] = [int(row[19]), int(row[20]), float(row[11])]
+ elif (row[3] == 'Sigma70_r4' or row[3] == 'Sigma70_r4_2')\
+ and float(row[11]) < hmmscan[row[0]]['Sigma4'][2]:
+ hmmscan[row[0]]['Sigma4'] = [int(row[19]), int(row[20]), float(row[11])]
ecfs = []
writer = sys.stdout
writer.write('\t'.join(['ID', 'sigma3?','sigma2?', 'sigma4?',
'Distance sigma2-sigma4 (aa)','sigma2_start','sigma2_end','sigma4_start', 'sigma4_end', 'Score general ECF HMM','Type'])+'\n')
@@ -103,21 +103,22 @@
writer.write('-\t-\t')
#Check general model score
writer.write(str(positives[p])+'\t')
#make decision
- if hmmscan_s3[p]:
- writer.write('Non-ECF\n')
- elif hmmscan_cons[p][0] and hmmscan_cons[p][1]:
+ if hmmscan_cons[p][0] and hmmscan_cons[p][1]:
if (hmmscan[p]['Sigma4'][0]-(hmmscan[p]['Sigma2'][1]+1)) >=50:
writer.write('Non-ECF\n')
elif positives[p] >= 60.8:
writer.write('ECF\n')
ecfs.append(p)
else:
writer.write('Non-ECF\n')
elif hmmscan_cons[p][0] or hmmscan_cons[p][1]:
- writer.write('ECF-like\n')
+ if hmmscan_s3[p]:
+ writer.write('Non-ECF\n')
+ else:
+ writer.write('ECF-like\n')
else:
writer.write('Non-ECF\n')