Skip to content

Commit 21941b2

Browse files
committed
new minor revision of quickMD-nf workflow
1 parent 0c59932 commit 21941b2

File tree

5 files changed

+314
-48
lines changed

5 files changed

+314
-48
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ logs/
66
.vscode
77
.nextflow*
88
results_2024-11-22/
9+
results_*/
910
results-4.0_2024-11-07/
1011
results-4.0_2024-11-08_75K/
1112
results-4.0_2024-11-08_old/

bin/openmmMD_dualmin.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,12 @@ def md_nvt(simulation, csvname: str, totalsteps: int, reprate: int, pdbname, int
167167
#output rmsf per residue per chain based on generated PDB trajectory
168168
def rmsf_analysis_by_chain(pdb_traj: str, rmsfcsv: str):
169169
u = mda.Universe(pdb_traj)
170-
ag = u.atoms
171-
new_dimensions = [117, 117, 117, 90, 90, 90]
172-
set_dim = transformations.boxdimensions.set_dimensions(new_dimensions)
173-
transform = transformations.unwrap(ag)
174-
center = transformations.center_in_box(ag.select_atoms('protein'), wrap=True)
175-
u.trajectory.add_transformations(set_dim, transform, center)
170+
#ag = u.atoms
171+
#new_dimensions = [117, 117, 117, 90, 90, 90]
172+
#set_dim = transformations.boxdimensions.set_dimensions(new_dimensions)
173+
#transform = transformations.unwrap(ag)
174+
#center = transformations.center_in_box(ag.select_atoms('protein'), wrap=True)
175+
#u.trajectory.add_transformations(set_dim, transform, center)
176176
chain_ids = np.unique(u.select_atoms('protein').atoms.chainIDs)
177177
for chain_id in chain_ids:
178178
chain = u.select_atoms(f'protein and chainID {chain_id}')

bin/openmmMD_dualmin_restraint.py

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
#!/usr/bin/env python3
2+
"""openmmMD_dualmin_restraint
3+
4+
Minimise the potential energy of the wildtype protein structure and mutated variants, according to AMBER14 potentials
5+
Usage:
6+
openmmMD_dualmin_restraint.py [--inpdb=<pdb>] [--pH=<pH>] [--steps=<steps>] [--report_every=<report_every>] [--init_steps=<init_steps>] [--relax_steps=<relax_steps>] [--relax_report_every=<relax_report_every>] [--no_restraints]
7+
8+
Options:
9+
--inpdb=<pdb> Input PDB file of protein as obtained from previous process
10+
--pH=<ph> Set pH of the protein
11+
--steps=<steps> Number of steps in final simulation proper after equilibriation and relax, used for RMSF calculation
12+
--report_every=<report_every> Report every # steps to final output PDB trajectory and energetic CSV file (CSV for whole reported trajectory)
13+
--init_steps=<init_steps> Initial steps of solvent equilibration (fixed protein backbone) before any reported trajectory
14+
--relax_steps=<relax_steps> Initial reported trajectory steps focused on protein structure relax without restraints
15+
--relax_report_every=<relax_report_every> Structure sampling rate of initial reported trajectory
16+
--no_restraints Allow movement of all atoms
17+
"""
18+
19+
import logging
20+
from docopt import docopt
21+
import pandas as pd
22+
import numpy as np
23+
import re
24+
import sys
25+
import pathlib
26+
import MDAnalysis as mda
27+
from MDAnalysis import transformations
28+
from MDAnalysis.analysis import rms, align
29+
from sys import stdout
30+
from pdbfixer import PDBFixer
31+
import openmm.app as app
32+
import openmm as mm
33+
from openmm.unit import *
34+
from openmm.app import element
35+
36+
#Process to clean the original PDB, find missing atoms, remove heterogens and assign a protonation state to mimic selected pH, and output fixed PDB file
37+
def clean_pdb(pdbname: str, pH: str):
38+
pH_fl = float(pH)
39+
pdb = PDBFixer(pdbname)
40+
pdb.missingResidues = {}
41+
pdb.removeHeterogens(False)
42+
pdb.findMissingAtoms()
43+
pdb.addMissingAtoms()
44+
pdb.addMissingHydrogens(pH_fl)
45+
stem = pdbname.replace(".pdb","")
46+
app.PDBFile.writeFile(pdb.topology, pdb.positions, open(stem + "_fixed.pdb", 'w'), keepIds=True)
47+
return pdb, stem
48+
49+
#Carries out minimisation on protein in vacuum to optimise not only covalent bonding structure but also important hydrogen bonds responsable for secondary structure. PDB file and final energy are outputted at the end. Similation parameters are self-contained in the process to avoid interference with later setup of solvation box.
50+
def vacuum_minimization(pdb, pdbout: str):
51+
pdb = pdb
52+
forcefield = app.ForceField('amber14-all.xml')
53+
system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff)
54+
integrator = mm.LangevinMiddleIntegrator(310*kelvin, 1/picosecond, 0.001*picoseconds)
55+
simulation = app.Simulation(pdb.topology, system, integrator)
56+
simulation.context.setPositions(pdb.positions)
57+
simulation.minimizeEnergy()
58+
final_state = simulation.context.getState(getEnergy=True, getPositions=True)
59+
logging.info("Final potential energy = %.9f kcal/mol"
60+
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
61+
final_pe = final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
62+
app.PDBFile.writeFile(simulation.topology, simulation.context.getState(getPositions=True).getPositions(), open(pdbout, "w"), keepIds=True)
63+
return pdbout, final_pe
64+
65+
#Similar to the above, carries out minimisation on protein in vacuum to optimise not only covalent bonding structure but also important hydrogen bonds responsable for secondary structure. However coordinates, instead of PDB file, along with final energy are outputted at the end, to allow sampling of minimised states and select the most optimised. Similation parameters are self-contained in the process to avoid interference with later setup of solvation box.
66+
def vacuum_minimization_sampling(pdb):
67+
pdb = pdb
68+
forcefield = app.ForceField('amber14-all.xml')
69+
system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff)
70+
integrator = mm.LangevinMiddleIntegrator(310*kelvin, 1/picosecond, 0.001*picoseconds)
71+
simulation = app.Simulation(pdb.topology, system, integrator)
72+
simulation.context.setPositions(pdb.positions)
73+
simulation.minimizeEnergy()
74+
final_state = simulation.context.getState(getEnergy=True, getPositions=True)
75+
logging.info("Final potential energy = %.9f kcal/mol"
76+
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
77+
final_pe = final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
78+
positions_final = final_state.getPositions()
79+
return final_pe, positions_final
80+
81+
#Set up forcefield with standard AMBER parameters for protein structure, and TIP3PFB
82+
def setup_forcefield():
83+
forcefield = app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
84+
return forcefield
85+
86+
#Set up input PDB topology and positions as modeller such that the simulation box can be modified (add solvent, virtual sites etc)
87+
def setup_modeller(pdb):
88+
#pdb = app.PDBFile(pdb)
89+
modeller = app.Modeller(pdb.topology, pdb.positions)
90+
return modeller
91+
92+
#Read PDB file
93+
def setup_pdbfile(pdbfile):
94+
pdb = app.PDBFile(pdbfile)
95+
return pdb
96+
97+
#instate resraints on backbone atoms via rendering backbone massless
98+
def apply_mass_restraints(system, topology):
99+
for atom in topology.atoms():
100+
if atom.name in ["CA", "C", "N"]:
101+
system.setParticleMass(atom.index, 0)
102+
103+
#disengage restraints on backbone atoms via reinstating default atomic mass
104+
def remove_mass_restraints(system, topology):
105+
for atom in topology.atoms():
106+
if atom.name in ["CA", "C", "N"]:
107+
element_mass = element.Element.getBySymbol(atom.element.symbol).mass
108+
system.setParticleMass(atom.index, element_mass)
109+
110+
111+
#integrate modeller and forcefied into a defined system. Before integration, solvent molecules are added to the box, with box vectors defined such that water density at a given pressure (normally standard pressure of 1atm) can be simulated. Padding is added, with excess molecules removed to achieve 1atm density
112+
def setup_system(modeller, forcefield, solvmol: str, padding: int):
113+
Natoms=int(solvmol)
114+
xvec = mm.Vec3(11.70, 0.0, 0.0)
115+
yvec = mm.Vec3(0.0, 11.70, 0.0)
116+
zvec = mm.Vec3(0.0, 0.0, 11.70)
117+
modeller.topology.setPeriodicBoxVectors((xvec,yvec,zvec))
118+
padding = padding * nanometer
119+
modeller.addSolvent(forcefield, padding=padding)
120+
water_molecules = []
121+
for res in modeller.topology.residues():
122+
if res.name == 'HOH':
123+
water_molecules.append(res)
124+
current_water_count = len(water_molecules)
125+
excess_water_count = current_water_count - Natoms
126+
if excess_water_count > 0:
127+
residues_to_remove = water_molecules[:excess_water_count]
128+
modeller.delete(residues_to_remove)
129+
final_water_count = sum(1 for res in modeller.topology.residues() if res.name == 'HOH')
130+
logging.info("Final water count = %d ", final_water_count)
131+
system = forcefield.createSystem(modeller.topology, nonbondedMethod=app.PME, nonbondedCutoff=1.0*nanometer, constraints=app.HBonds)
132+
#if not no_restraints:
133+
# logging.info("Using restraints on backbone")
134+
# for atom in modeller.topology.atoms():
135+
# if atom.name in ["CA","C","N"]:
136+
# system.setParticleMass(atom.index, 0)
137+
return system
138+
139+
#Define simulation parameter and simulation.context, combining modeller and system, and setting integrator
140+
def setup_simulation(modeller, system):
141+
integrator = mm.LangevinMiddleIntegrator(310*kelvin, 1/picosecond, 0.001*picoseconds)
142+
simulation = app.Simulation(modeller.topology, system, integrator)
143+
simulation.context.setPositions(modeller.positions)
144+
return simulation, integrator
145+
146+
#Combine setup_forcefield and setup_system functions, redefine box vectors ahead of local minimisation. Output final minimised topology and mimimisation, and return simulation after minimisation
147+
def energy_minimization(modeller, info_sheet, no_restraints: bool):
148+
forcefield = setup_forcefield()
149+
solvmol = 50000
150+
padding = 1 #nanometer
151+
system = setup_system(modeller, forcefield, solvmol, padding)
152+
xvec = mm.Vec3(11.70, 0.0, 0.0)
153+
yvec = mm.Vec3(0.0, 11.70, 0.0)
154+
zvec = mm.Vec3(0.0, 0.0, 11.70)
155+
if not no_restraints:
156+
apply_mass_restraints(system, modeller.topology)
157+
simulation = setup_simulation(modeller, system)[0]
158+
integrator = setup_simulation(modeller, system)[1]
159+
simulation.context.setPeriodicBoxVectors(xvec,yvec,zvec)
160+
init_state = simulation.context.getState(getEnergy=True, getPositions=True)
161+
logging.info("Starting potential energy = %.9f kcal/mol"
162+
% init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
163+
init_pe = init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
164+
mm.LocalEnergyMinimizer.minimize(simulation.context)
165+
final_state = simulation.context.getState(getEnergy=True, getPositions=True)
166+
logging.info("Final potential energy = %.9f kcal/mol"
167+
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
168+
final_pe = final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
169+
with open(info_sheet, 'w') as file:
170+
file.write(f'Initial potential energy: {init_pe}\n')
171+
file.write(f'Minimised potential energy: {final_pe}\n')
172+
return simulation.topology, final_state.getPositions(), final_pe, simulation, integrator, system
173+
174+
#Takes minimised solvation box to run grand canonical ensemble (NVT), with initial temperature increase in 100-step intervals. 0,5ns of equilibration is allowed to achieve equilibrium state. After which the requested number of steps is run, with reporting corresponding to the number of steps in the --report_every option
175+
def md_nvt(simulation, csvname: str, totalsteps: int, reprate: int, pdbname, integrator, system, initsteps: int, initpdb: str, relax_steps: int, relax_report_every: int, no_restraints: bool):
176+
init_state = simulation.context.getState(getEnergy=True, getPositions=True)
177+
init_pe = init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
178+
for temp in range(0, 310, 10):
179+
integrator.setTemperature(temp*kelvin)
180+
simulation.reporters.append(app.StateDataReporter(stdout, 1000, step=True, potentialEnergy=True, temperature=True, volume=True))
181+
simulation.step(10000)
182+
simulation.context.setVelocitiesToTemperature(310*kelvin)
183+
#simulation.step(496800)
184+
simulation.step(initsteps)
185+
if not no_restraints:
186+
remove_mass_restraints(system, simulation.topology)
187+
simulation.reporters.append(app.PDBReporter(initpdb, relax_report_every))
188+
simulation.reporters.append(app.StateDataReporter(stdout, reprate, step=True, potentialEnergy=True, temperature=True, volume=True))
189+
prepdf = {'Step':[], 'Potential Energy_kJ/mole':[], 'Temperature_K)':[], 'Box Volume_nm^3':[]}
190+
inidf = pd.DataFrame(prepdf)
191+
inidf.to_csv(csvname, index=False)
192+
simulation.reporters.append(app.StateDataReporter(csvname, reprate, step=True,
193+
potentialEnergy=True, temperature=True, volume=True, append=True))
194+
simulation.step(relax_steps)
195+
simulation.reporters.append(app.PDBReporter(pdbname, reprate))
196+
simulation.step(totalsteps)
197+
final_state = simulation.context.getState(getEnergy=True, getPositions=True)
198+
final_pe = final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
199+
df = pd.read_csv(csvname)
200+
av_energy = df.loc[:, 'Potential Energy_kJ/mole'].mean()
201+
return init_pe, final_pe, av_energy, simulation, simulation.topology, init_state.getPositions(), final_state.getPositions()
202+
203+
#output rmsf per residue per chain based on generated PDB trajectory
204+
def rmsf_analysis_by_chain(pdb_traj: str, rmsfcsv: str):
205+
u = mda.Universe(pdb_traj)
206+
#ag = u.atoms
207+
#new_dimensions = [117, 117, 117, 90, 90, 90]
208+
#set_dim = transformations.boxdimensions.set_dimensions(new_dimensions)
209+
#transform = transformations.unwrap(ag)
210+
#center = transformations.center_in_box(ag.select_atoms('protein'), wrap=True)
211+
#u.trajectory.add_transformations(set_dim, transform, center)
212+
chain_ids = np.unique(u.select_atoms('protein').atoms.chainIDs)
213+
for chain_id in chain_ids:
214+
chain = u.select_atoms(f'protein and chainID {chain_id}')
215+
align.AlignTraj(u, chain, select=f'protein and chainID {chain_id} and name CA', in_memory=True).run()
216+
c_alphas = chain.select_atoms('protein and name CA')
217+
R = rms.RMSF(c_alphas).run()
218+
residue_ids = [residue.resid for residue in c_alphas.residues]
219+
residue_names = [residue.resname for residue in c_alphas.residues]
220+
rmsf_values = R.rmsf
221+
df = pd.DataFrame({'Res_ID': residue_ids, 'Res_Name': residue_names, 'RMSF': rmsf_values })
222+
print(df)
223+
rmsfout = str(rmsfcsv + "_rmsf_ch" + chain_id + ".csv")
224+
df.to_csv(rmsfout, index=False)
225+
226+
def main():
227+
arguments = docopt(__doc__, version='openmmMD_dualmin_restraint.py')
228+
#fix PDB input file and impose protonation state to recreate chosen pH
229+
pdb_clean = clean_pdb(arguments['--inpdb'], arguments['--pH'])
230+
pdb = pdb_clean[0]
231+
stem = pdb_clean[1]
232+
pH_str = str(arguments['--pH'] + "pH")
233+
for j in range(1,2):
234+
strj = str(j)
235+
#sample 100 iterations of minimisation in a vacuum and select coordinates of lowest energy state
236+
pdbout = str(stem + "_" + pH_str + "_iter_" + strj + "vac_min.pdb")
237+
energies = []
238+
positions = []
239+
iterations = 100
240+
for step in range(iterations):
241+
state_out = vacuum_minimization_sampling(pdb)
242+
energy = state_out[0]
243+
positions_final = state_out[1]
244+
energies.append(energy)
245+
positions.append(positions_final)
246+
min_energy_index = np.argmin(energies)
247+
lowest_energy_positions = positions[min_energy_index]
248+
app.PDBFile.writeFile(pdb.topology, lowest_energy_positions, open(pdbout, 'w'), keepIds=True)
249+
pdb = app.PDBFile(pdbout)
250+
max_iterations = 100
251+
#set up simulation of protein solvated by water, conditional on minimisation creating net potential energy state (E_tot < 0) to avoid box explosion
252+
info_sheet = str(stem + pH_str + "iter" + strj + "_info.txt")
253+
modeller = setup_modeller(pdb)
254+
second_iteration = 0
255+
while second_iteration < max_iterations:
256+
min_pdb = energy_minimization(modeller, info_sheet, arguments['--no_restraints'])
257+
outputII = min_pdb[2]
258+
thresholdII = 0
259+
if outputII < thresholdII:
260+
break
261+
second_iteration += 1
262+
min_out = str(stem + pH_str + "iter" + strj + ".pdb")
263+
app.PDBFile.writeFile(min_pdb[0], min_pdb[1], open(min_out, "w"), keepIds=True)
264+
#set system, simulation and integrator parameters to run main trajectory, define output CSV and trajectory PDB names, set total steps, report rate and initial steps (for equilibration), set as inputs to NVT simulation function (md_nvt)
265+
simulation = min_pdb[3]
266+
integrator = min_pdb[4]
267+
system = min_pdb[5]
268+
initpdb = str(stem + pH_str + "_structure_sampling_" + strj + ".pdb")
269+
csvname = str(stem + pH_str + "_traj" + strj + ".csv")
270+
pdbname = str(stem + pH_str + "_traj" + strj + ".pdb")
271+
steps =int(arguments['--steps'])
272+
report_every = int(arguments['--report_every'])
273+
init_steps = int(arguments['--init_steps']) # total unreported step during solvent equilibration/protein restrained
274+
temp_steps = 310000 #total number of steps for heating from 0K to 310K
275+
equil_steps = init_steps - temp_steps
276+
relax_steps = int(arguments["--relax_steps"])
277+
relax_report_every = int(arguments['--relax_report_every'])
278+
sim_run = md_nvt(simulation, csvname, steps, report_every, pdbname, integrator, system, equil_steps, initpdb, relax_steps, relax_report_every, arguments['--no_restraints'])
279+
av_energy = sim_run[2]
280+
with open(info_sheet, 'a') as file:
281+
file.write(f'Average trajectory potential energy: {av_energy}\n')
282+
#Reference (final) and feedstock (initial) frames in case of future use
283+
sim_ref = str(stem + pH_str + "_reference" + strj + ".pdb")
284+
app.PDBFile.writeFile(sim_run[4], sim_run[5], open(sim_ref, "w"), keepIds=True)
285+
sim_fin = str(stem + pH_str + "_feedstock" + strj + ".pdb")
286+
app.PDBFile.writeFile(sim_run[4], sim_run[6], open(sim_fin, "w"), keepIds=True)
287+
#set rmsf output name and perform rmsf analysis on output trajectory
288+
rmsfstem = str(stem + pH_str + "_traj" + strj)
289+
rmsf_analysis_by_chain(pdbname, rmsfstem)
290+
291+
292+
if __name__ == '__main__':
293+
arguments = docopt(__doc__, version='openmmMD_dualmin_restraint.py')
294+
logging.getLogger().setLevel(logging.INFO)
295+
main()

0 commit comments

Comments
 (0)