-
Notifications
You must be signed in to change notification settings - Fork 0
/
plumed_residues.py
228 lines (199 loc) · 11.4 KB
/
plumed_residues.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
########################################################################################################################
# #
# Script to return a text file with a list of residue-by-residue contact, a file to visualise the residues in #
# PyMOL, and a file to visualise the residues in VMD. Use the same protein in 2 different states. #
# Ensure the protein has the same number of atoms and the that all each atom on one state maps to the same atom #
# on the other state. Ensure all files are in GROMACS format (.gro). #
# #
# Usage: #
# #
# python plumed.py --input1 /path/to/inactive/state.gro --input2 /path/to/active/state.gro --gpcr BW_numbers.txt #
# #
# The last file is optional and only works with GPCR structures. # #
# #
# Script developed by Jan Domanski and Naushad Velgy. Please send questions to [email protected] #
# #
########################################################################################################################
from MDAnalysis import Universe
from MDAnalysis.lib import distances as cdists
import numpy as np
import csv
import re
import os
import sys
#Map to nearest non-symmetrical atom (often C, also N and O and S)
selection = """
protein and not name H* and (
(resname ALA and (backbone or name CB))
or (resname ARG and (backbone or name CB or name CG or name NE))
or (resname ASN and (backbone or name CG))
or (resname ASP and (backbone or name CG))
or (resname CYS and (backbone or name SG))
or (resname GLN and (backbone or name CB or name CD))
or (resname GLU and (backbone or name CB or name CD))
or (resname GLY)
or (resname HIS and (backbone or name CE1 or name NE2))
or (resname ILE and (backbone or name CB))
or (resname LEU and (backbone or name CG))
or (resname LYS and (backbone or name CB or name CZ or name NZ))
or (resname MET and (backbone or name CB or name SD))
or (resname SER and (backbone or name OG))
or (resname PHE and (backbone or name CG or name CZ))
or (resname PRO and (backbone or name CD))
or (resname THR and (backbone or name CB))
or (resname TRP and (name CE3 or name NE1))
or (resname TYR and (backbone or name CG or name CZ))
or (resname VAL and (backbone or name CB))
)
"""
################################################################################
parser = ArgumentParser(description="Generates a text file with a list of unique residues, as well as files for "
"visualisation. Requires MDAnalysis and numpy.")
parser.add_argument('--input1', dest=struct1, default=None, help="Input structure 1, in GRO format.")
parser.add_argument('--input2', dest=struct2, default=None, help="Input structure 2, in GRO format.")
parser.add_argument('--scaling_factor', dest=SF, default=1.8, help="Scaling factor. Default is 1.8.")
parser.add_argument('--gpcr', dest=gpcr, default=None, help="Input text with Ballesteros-Weinstein numbers."
"See notes for more details."
"If your protein is not a GPCR, do not use this flag.")
parser.add_argument('--pymol', dest=pymol, action='store_true', help="Outputs a PML file to be used in PyMOL."
"See notes for more details.")
parser.add_argument('--vmd', dest=vmd, action='store_true', help="Outputs a TCL file to be used in VMD."
"See notes for more details.")
parser.add_argument('--help', dest=help, action='store_true', help="Print this page and exit.")
args = parser.parse_args()
################################################################################
if args.help == True:
print "Usage:"
print "/path/to/python /path/to/plumed_residues.py --input1 /path/to/state1.gro --input2 /path/to/state2.gro"
print "--input1 and --input2 are 2 receptor files in different conformations. \n" \
"Make sure atoms are in the same order."
print "--scaling_factor (optional) determines the scaling factor. The default is 1.8."
print "--gpcr (optional) signals script to output residue list with Ballesteros-Weinstein numbers. See notes."
print "--pymol (optional) signals script to output a PML file for visualisation of contacts in PyMOL. See notes."
print "--vmd (optional) signals script to output a TCL file for visualisation of contacts in VMD. See notes."
print "--help prints this page and exits."
sys.exit()
try:
inactive = Universe(args.struct1)
active = Universe(args.struct2)
#Sanity check 1
if inactive.filename.split('.')[1] != 'gro' and active.filename.split('.')[1] != 'gro':
print "This scripts works better if both inputs are in the GRO format."
print "This is because the nomenclature of GRO files is consistent, which is important."
print "Consider converting your files to GRO files using, for example,"
print "gmx editconf -f input.notgro -o input.gro"
sys.exit()
except IndexError:
print "Usage:"
print "python plumed.py --input1 /path/to/inactive/state.gro --input2 /path/to/active/state.gro"
sys.exit()
#Sanity check 2 (very important)
eq = True
with open('plumed_residues.err', 'w') as e:
for i, atoms in enumerate(zip(inactive.select_atoms('protein').atoms.names,
active.select_atoms('protein').atoms.names)):
if atoms[0] == atoms[1]:
continue
else:
e.write("Atom {} is different in the two structures.".format(i))
eq = False
if eq == False:
print "The atom names order of your two files are different."
print "Make sure they are in the same order (do not remove hydrogens!) and try again."
print "Check the plumed_residues.err file for more information."
sys.exit()
SCALING_FACTOR = args.SF
def generate_contacts(
universe,
selection,
upper_cutoff=6.0,
lower_cutoff=0.0,
resid_separation=3
):
gr = universe.select_atoms(selection)
distances = cdists.distance_array(gr.positions, gr.positions)
triu = np.triu(distances)
mask = np.argwhere((triu < upper_cutoff) & (triu > lower_cutoff))
contacts = [(atom1, atom2) for atom1, atom2 in mask \
if not abs(gr.atoms[atom1].resid - gr.atoms[atom2].resid) <= resid_separation]
return gr, triu, set([ frozenset(pair) for pair in contacts ])
def get_residues(gr, contacts):
return [ [gr[key[0]].resid, gr[key[1]].resid] for key in contacts.keys() ]
def get_BW_numbers(text):
BW_numbers = {re.compile("([a-zA-Z]+)([0-9]+)").match(line[2].strip()).group(2): line[1].strip() for line in
csv.reader(open(text, 'r'), delimiter='\t') }
return BW_numbers
atoms_active, triu_active, contacts_active = generate_contacts(active, selection)
atoms_inactive, triu_inactive, contacts_inactive = generate_contacts(inactive, selection)
unique_active_contacts = contacts_active - contacts_inactive
unique_inactive_contacts = contacts_inactive - contacts_active
contacts_active = {}
for idA, idB in unique_active_contacts:
da = triu_active[idA, idB] or triu_active[idB, idA]
di = triu_inactive[idA, idB] or triu_inactive[idB, idA]
if min(da, di) * SCALING_FACTOR > max(da, di): continue
#print("active", da, di)
contacts_active[(idA, idB)] = da
contacts_inactive = {}
for idA, idB in unique_inactive_contacts:
da = triu_active[idA, idB] or triu_active[idB, idA]
di = triu_inactive[idA, idB] or triu_inactive[idB, idA]
if min(da, di) * SCALING_FACTOR > max(da, di): continue
#print("inactive", da, di)
contacts_inactive[(idA, idB)] = di
all_res_inactive_list = get_residues(atoms_inactive, contacts_inactive)
inactive_res_list = []
a = [ inactive_res_list.append(elem) for elem in all_res_inactive_list if elem not in inactive_res_list ]
all_res_active_list = get_residues(atoms_active, contacts_active)
active_res_list = []
b = [ active_res_list.append(elem) for elem in all_res_active_list if elem not in active_res_list ]
if args.gpcr == True:
try:
BW_numbers = get_BW_numbers(args.gpcr)
with open('Unique_contact_residues.dat', 'w') as f:
writer = csv.writer(f)
f.write('Inactive\n')
f.write('Resid1 \t BW_number \t Resid2 \t BW_number \n')
for entry in inactive_res_list:
try:
f.write('%s \t %s \t %s \t %s \n' % (entry[0], BW_numbers[str(entry[0])], entry[1],
BW_numbers[str(entry[1])]))
except KeyError:
continue
f.write('Active\n')
f.write('Resid1 \t Resid2\n')
for entry in active_res_list:
try:
f.write('%s \t %s \t %s \t %s \n' % (entry[0], BW_numbers[str(entry[0])], entry[1],
BW_numbers[str(entry[1])]))
except KeyError:
continue
except IndexError:
print 'BW numbers file not given, or doesn\'t have the correct format.'
with open('Unique_contact_residues.dat', 'w') as f:
writer = csv.writer(f)
f.write('Inactive\n')
f.write('Resid1 \t Resid2\n')
for entry in inactive_res_list:
f.write('%s \t %s \n' % (entry[0], entry[1]))
f.write('Active\n')
f.write('Resid1 \t Resid2\n')
for entry in active_res_list:
f.write('%s \t %s \n' % (entry[0], entry[1]))
if args.pymol == True:
with open('PyMOL_representation.pml', 'w') as f:
for entry in get_residues(atoms_inactive, contacts_inactive):
f.write('show sticks, resid %s or resid %s and not name H*\n' % (entry[0], entry[1]))
for entry in get_residues(atoms_active, contacts_active):
f.write('show sticks, resid %s or resid %s and not name H*\n' % (entry[0], entry[1]))
f.write('hide (hydro)')
if args.vmd == True:
with open('VMD_representation.tcl', 'w') as f:
for entry in get_residues(atoms_inactive, contacts_inactive):
f.write('mol representation Licorice\n')
f.write('mol selection "noh(resid %s or resid %s)"\n' % (entry[0], entry[1]))
f.write('mol addrep top\n')
for entry in get_residues(atoms_active, contacts_active):
f.write('mol representation Licorice\n')
f.write('mol selection "noh(resid %s or resid %s)"\n' % (entry[0], entry[1]))
f.write('mol addrep top\n')