Skip to content

Commit 3808f75

Browse files
committed
new testing patch of quickMD-nf workflow
1 parent 0798f04 commit 3808f75

File tree

1 file changed

+40
-34
lines changed

1 file changed

+40
-34
lines changed

bin/openmmMD_dualmin_restraint.py

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
Minimise the potential energy of the wildtype protein structure and mutated variants, according to AMBER14 potentials
55
Usage:
6-
openmmMD_dualmin_restraint.py [--inpdb=<pdb>] [--pH=<pH>] [--steps=<steps>] [--report_every=<report_every>] [--init_steps=<init_steps>] [--relax_steps=<relax_steps>] [--relax_report_every=<relax_report_every>] [--no_restraints]
6+
openmmMD_dualmin_restraint.py [--inpdb=<pdb>] [--pH=<pH>] [--steps=<steps>] [--report_every=<report_every>] [--init_steps=<init_steps>] [--relax_steps=<relax_steps>] [--relax_report_every=<relax_report_every>] [--no_restraints] [--test_conditions]
77
88
Options:
99
--inpdb=<pdb> Input PDB file of protein as obtained from previous process
@@ -14,6 +14,7 @@
1414
--relax_steps=<relax_steps> Initial reported trajectory steps focused on protein structure relax without restraints
1515
--relax_report_every=<relax_report_every> Structure sampling rate of initial reported trajectory
1616
--no_restraints Allow movement of all atoms
17+
--test_conditions Apply test conditions, reducing initial phase significantly
1718
"""
1819

1920
import logging
@@ -33,7 +34,7 @@
3334
from openmm.unit import *
3435
from openmm.app import element
3536

36-
#Process to clean the original PDB, find missing atoms, remove heterogens and assign a protonation state to mimic selected pH, and output fixed PDB file
37+
# process to clean the original PDB, find missing atoms, remove heterogens and assign a protonation state to mimic selected pH, and output fixed PDB file
3738
def clean_pdb(pdbname: str, pH: str):
3839
pH_fl = float(pH)
3940
pdb = PDBFixer(pdbname)
@@ -46,23 +47,23 @@ def clean_pdb(pdbname: str, pH: str):
4647
app.PDBFile.writeFile(pdb.topology, pdb.positions, open(stem + "_fixed.pdb", 'w'), keepIds=True)
4748
return pdb, stem
4849

49-
#Carries out minimisation on protein in vacuum to optimise not only covalent bonding structure but also important hydrogen bonds responsable for secondary structure. PDB file and final energy are outputted at the end. Similation parameters are self-contained in the process to avoid interference with later setup of solvation box.
50+
# carries out minimisation on protein in vacuum to optimise not only covalent bonding structure but also important hydrogen bonds responsable for secondary structure. PDB file and final energy are outputted at the end. Similation parameters are self-contained in the process to avoid interference with later setup of solvation box.
5051
def vacuum_minimization(pdb, pdbout: str):
5152
pdb = pdb
5253
forcefield = app.ForceField('amber14-all.xml')
5354
system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff)
5455
integrator = mm.LangevinMiddleIntegrator(310*kelvin, 1/picosecond, 0.001*picoseconds)
5556
simulation = app.Simulation(pdb.topology, system, integrator)
5657
simulation.context.setPositions(pdb.positions)
57-
simulation.minimizeEnergy()
58+
simulation.minimizeEnergy(tolerance=0.5)
5859
final_state = simulation.context.getState(getEnergy=True, getPositions=True)
5960
logging.info("Final potential energy = %.9f kcal/mol"
6061
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
6162
final_pe = final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
6263
app.PDBFile.writeFile(simulation.topology, simulation.context.getState(getPositions=True).getPositions(), open(pdbout, "w"), keepIds=True)
6364
return pdbout, final_pe
6465

65-
#Similar to the above, carries out minimisation on protein in vacuum to optimise not only covalent bonding structure but also important hydrogen bonds responsable for secondary structure. However coordinates, instead of PDB file, along with final energy are outputted at the end, to allow sampling of minimised states and select the most optimised. Similation parameters are self-contained in the process to avoid interference with later setup of solvation box.
66+
# similar to the above, carries out minimisation on protein in vacuum to optimise not only covalent bonding structure but also important hydrogen bonds responsable for secondary structure. However coordinates, instead of PDB file, along with final energy are outputted at the end, to allow sampling of minimised states and select the most optimised. Similation parameters are self-contained in the process to avoid interference with later setup of solvation box.
6667
def vacuum_minimization_sampling(pdb):
6768
pdb = pdb
6869
forcefield = app.ForceField('amber14-all.xml')
@@ -78,37 +79,37 @@ def vacuum_minimization_sampling(pdb):
7879
positions_final = final_state.getPositions()
7980
return final_pe, positions_final
8081

81-
#Set up forcefield with standard AMBER parameters for protein structure, and TIP3PFB
82+
# set up forcefield with standard AMBER parameters for protein structure, and TIP3PFB
8283
def setup_forcefield():
8384
forcefield = app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
8485
return forcefield
8586

86-
#Set up input PDB topology and positions as modeller such that the simulation box can be modified (add solvent, virtual sites etc)
87+
# set up input PDB topology and positions as modeller such that the simulation box can be modified (add solvent, virtual sites etc)
8788
def setup_modeller(pdb):
8889
#pdb = app.PDBFile(pdb)
8990
modeller = app.Modeller(pdb.topology, pdb.positions)
9091
return modeller
9192

92-
#Read PDB file
93+
# read PDB file
9394
def setup_pdbfile(pdbfile):
9495
pdb = app.PDBFile(pdbfile)
9596
return pdb
9697

97-
#instate resraints on backbone atoms via rendering backbone massless
98+
# instate resraints on backbone atoms via rendering backbone massless
9899
def apply_mass_restraints(system, topology):
99100
for atom in topology.atoms():
100101
if atom.name in ["CA", "C", "N"]:
101102
system.setParticleMass(atom.index, 0)
102103

103-
#disengage restraints on backbone atoms via reinstating default atomic mass
104+
# disengage restraints on backbone atoms via reinstating default atomic mass
104105
def remove_mass_restraints(system, topology):
105106
for atom in topology.atoms():
106107
if atom.name in ["CA", "C", "N"]:
107108
element_mass = element.Element.getBySymbol(atom.element.symbol).mass
108109
system.setParticleMass(atom.index, element_mass)
109110

110111

111-
#integrate modeller and forcefied into a defined system. Before integration, solvent molecules are added to the box, with box vectors defined such that water density at a given pressure (normally standard pressure of 1atm) can be simulated. Padding is added, with excess molecules removed to achieve 1atm density
112+
# integrate modeller and forcefied into a defined system. Before integration, solvent molecules are added to the box, with box vectors defined such that water density at a given pressure (normally standard pressure of 1atm) can be simulated. Padding is added, with excess molecules removed to achieve 1atm density
112113
def setup_system(modeller, forcefield, solvmol: str, padding: int):
113114
Natoms=int(solvmol)
114115
xvec = mm.Vec3(11.70, 0.0, 0.0)
@@ -126,24 +127,19 @@ def setup_system(modeller, forcefield, solvmol: str, padding: int):
126127
if excess_water_count > 0:
127128
residues_to_remove = water_molecules[:excess_water_count]
128129
modeller.delete(residues_to_remove)
129-
final_water_count = sum(1 for res in modeller.topology.residues() if res.name == 'HOH')
130+
final_water_count = sum([1 for res in modeller.topology.residues() if res.name == 'HOH'])
130131
logging.info("Final water count = %d ", final_water_count)
131132
system = forcefield.createSystem(modeller.topology, nonbondedMethod=app.PME, nonbondedCutoff=1.0*nanometer, constraints=app.HBonds)
132-
#if not no_restraints:
133-
# logging.info("Using restraints on backbone")
134-
# for atom in modeller.topology.atoms():
135-
# if atom.name in ["CA","C","N"]:
136-
# system.setParticleMass(atom.index, 0)
137133
return system
138134

139-
#Define simulation parameter and simulation.context, combining modeller and system, and setting integrator
135+
# define simulation parameter and simulation.context, combining modeller and system, and setting integrator
140136
def setup_simulation(modeller, system):
141137
integrator = mm.LangevinMiddleIntegrator(310*kelvin, 1/picosecond, 0.001*picoseconds)
142138
simulation = app.Simulation(modeller.topology, system, integrator)
143139
simulation.context.setPositions(modeller.positions)
144140
return simulation, integrator
145141

146-
#Combine setup_forcefield and setup_system functions, redefine box vectors ahead of local minimisation. Output final minimised topology and mimimisation, and return simulation after minimisation
142+
# combine setup_forcefield and setup_system functions, redefine box vectors ahead of local minimisation. Output final minimised topology and mimimisation, and return simulation after minimisation
147143
def energy_minimization(modeller, info_sheet, no_restraints: bool):
148144
forcefield = setup_forcefield()
149145
solvmol = 50000
@@ -166,19 +162,23 @@ def energy_minimization(modeller, info_sheet, no_restraints: bool):
166162
logging.info("Final potential energy = %.9f kcal/mol"
167163
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
168164
final_pe = final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
169-
with open(info_sheet, 'w') as file:
170-
file.write(f'Initial potential energy: {init_pe}\n')
171-
file.write(f'Minimised potential energy: {final_pe}\n')
165+
with open(info_sheet, 'a') as file:
166+
file.write(f'Initial potential energy of solution: {init_pe}\n')
167+
file.write(f'Minimised potential energy of solution: {final_pe}\n')
172168
return simulation.topology, final_state.getPositions(), final_pe, simulation, integrator, system
173169

174-
#Takes minimised solvation box to run grand canonical ensemble (NVT), with initial temperature increase in 100-step intervals. 0,5ns of equilibration is allowed to achieve equilibrium state. After which the requested number of steps is run, with reporting corresponding to the number of steps in the --report_every option
175-
def md_nvt(simulation, csvname: str, totalsteps: int, reprate: int, pdbname, integrator, system, initsteps: int, initpdb: str, relax_steps: int, relax_report_every: int, no_restraints: bool):
170+
# takes minimised solvation box to run grand canonical ensemble (NVT), with initial temperature increase in 100-step intervals. 0,5ns of equilibration is allowed to achieve equilibrium state. After which the requested number of steps is run, with reporting corresponding to the number of steps in the --report_every option
171+
def md_nvt(simulation, csvname: str, totalsteps: int, reprate: int, pdbname, integrator, system, initsteps: int, initpdb: str, relax_steps: int, relax_report_every: int, no_restraints: bool, test_conditions: bool):
176172
init_state = simulation.context.getState(getEnergy=True, getPositions=True)
177173
init_pe = init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
178174
for temp in range(0, 310, 10):
179175
integrator.setTemperature(temp*kelvin)
180-
simulation.reporters.append(app.StateDataReporter(stdout, 1000, step=True, potentialEnergy=True, temperature=True, volume=True))
181-
simulation.step(10000)
176+
if test_conditions:
177+
simulation.reporters.append(app.StateDataReporter(stdout, 100, step=True, potentialEnergy=True, temperature=True, volume=True))
178+
simulation.step(100)
179+
else:
180+
simulation.reporters.append(app.StateDataReporter(stdout, 100, step=True, potentialEnergy=True, temperature=True, volume=True))
181+
simulation.step(10000)
182182
simulation.context.setVelocitiesToTemperature(310*kelvin)
183183
#simulation.step(496800)
184184
simulation.step(initsteps)
@@ -200,7 +200,7 @@ def md_nvt(simulation, csvname: str, totalsteps: int, reprate: int, pdbname, int
200200
av_energy = df.loc[:, 'Potential Energy_kJ/mole'].mean()
201201
return init_pe, final_pe, av_energy, simulation, simulation.topology, init_state.getPositions(), final_state.getPositions()
202202

203-
#output rmsf per residue per chain based on generated PDB trajectory
203+
# output rmsf per residue per chain based on generated PDB trajectory
204204
def rmsf_analysis_by_chain(pdb_traj: str, rmsfcsv: str):
205205
u = mda.Universe(pdb_traj)
206206
#ag = u.atoms
@@ -245,11 +245,14 @@ def main():
245245
positions.append(positions_final)
246246
min_energy_index = np.argmin(energies)
247247
lowest_energy_positions = positions[min_energy_index]
248+
lowest_energy_value = energies[min_energy_index]
248249
app.PDBFile.writeFile(pdb.topology, lowest_energy_positions, open(pdbout, 'w'), keepIds=True)
249250
pdb = app.PDBFile(pdbout)
250-
max_iterations = 100
251-
#set up simulation of protein solvated by water, conditional on minimisation creating net potential energy state (E_tot < 0) to avoid box explosion
252251
info_sheet = str(stem + pH_str + "iter" + strj + "_info.txt")
252+
with open(info_sheet, 'w') as file:
253+
file.write(f'Minimum potential energy of protein-in-vacuo: {lowest_energy_value}\n')
254+
#set up simulation of protein solvated by water, conditional on minimisation creating net potential energy state (E_tot < 0) to avoid box explosion
255+
max_iterations = 100
253256
modeller = setup_modeller(pdb)
254257
second_iteration = 0
255258
while second_iteration < max_iterations:
@@ -261,7 +264,7 @@ def main():
261264
second_iteration += 1
262265
min_out = str(stem + pH_str + "iter" + strj + ".pdb")
263266
app.PDBFile.writeFile(min_pdb[0], min_pdb[1], open(min_out, "w"), keepIds=True)
264-
#set system, simulation and integrator parameters to run main trajectory, define output CSV and trajectory PDB names, set total steps, report rate and initial steps (for equilibration), set as inputs to NVT simulation function (md_nvt)
267+
# set system, simulation and integrator parameters to run main trajectory, define output CSV and trajectory PDB names, set total steps, report rate and initial steps (for equilibration), set as inputs to NVT simulation function (md_nvt)
265268
simulation = min_pdb[3]
266269
integrator = min_pdb[4]
267270
system = min_pdb[5]
@@ -271,20 +274,23 @@ def main():
271274
steps =int(arguments['--steps'])
272275
report_every = int(arguments['--report_every'])
273276
init_steps = int(arguments['--init_steps']) # total unreported step during solvent equilibration/protein restrained
274-
temp_steps = 310000 #total number of steps for heating from 0K to 310K
277+
if arguments['--test_conditions']:
278+
temp_steps = 3100 # total number of steps for heating from 0K to 310K
279+
else:
280+
temp_steps = 310000 # total number of steps for heating from 0K to 310K
275281
equil_steps = init_steps - temp_steps
276282
relax_steps = int(arguments["--relax_steps"])
277283
relax_report_every = int(arguments['--relax_report_every'])
278-
sim_run = md_nvt(simulation, csvname, steps, report_every, pdbname, integrator, system, equil_steps, initpdb, relax_steps, relax_report_every, arguments['--no_restraints'])
284+
sim_run = md_nvt(simulation, csvname, steps, report_every, pdbname, integrator, system, equil_steps, initpdb, relax_steps, relax_report_every, arguments['--no_restraints'], arguments['--test_conditions'])
279285
av_energy = sim_run[2]
280286
with open(info_sheet, 'a') as file:
281287
file.write(f'Average trajectory potential energy: {av_energy}\n')
282-
#Reference (final) and feedstock (initial) frames in case of future use
288+
# reference (final) and feedstock (initial) frames in case of future use
283289
sim_ref = str(stem + pH_str + "_reference" + strj + ".pdb")
284290
app.PDBFile.writeFile(sim_run[4], sim_run[5], open(sim_ref, "w"), keepIds=True)
285291
sim_fin = str(stem + pH_str + "_feedstock" + strj + ".pdb")
286292
app.PDBFile.writeFile(sim_run[4], sim_run[6], open(sim_fin, "w"), keepIds=True)
287-
#set rmsf output name and perform rmsf analysis on output trajectory
293+
# set rmsf output name and perform rmsf analysis on output trajectory
288294
rmsfstem = str(stem + pH_str + "_traj" + strj)
289295
rmsf_analysis_by_chain(pdbname, rmsfstem)
290296

0 commit comments

Comments
 (0)