Skip to content

Commit 66d14f6

Browse files
ABFE protocol (#1045)
Add an Absolute Binding Free Energy Protocol to openfe. Key features & limitations: * Automatic Boresch restraint assignment. * Does not handle net charge corrections yet. --------- Co-authored-by: Irfan Alibay <[email protected]> Co-authored-by: hannahbaumann <[email protected]> Co-authored-by: Hannah Baumann <[email protected]>
1 parent b7a49df commit 66d14f6

36 files changed

+3956
-188
lines changed

devtools/data/gen_serialized_results.py

Lines changed: 106 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Dev script to generate some result jsons that are used for testing
33
44
Generates
5+
- ABFEProtocol_json_results.gz
6+
- used in abfe_results_json fixture
57
- SepTopProtocol_json_results.gy
68
- used in septop_json fixture
79
- AHFEProtocol_json_results.gz
@@ -15,8 +17,8 @@
1517
import json
1618
import logging
1719
import pathlib
18-
import tempfile
1920
from rdkit import Chem
21+
import tempfile
2022
from openff.toolkit import (
2123
Molecule, RDKitToolkitWrapper, AmberToolsToolkitWrapper
2224
)
@@ -30,10 +32,25 @@
3032
from gufe.tokenization import JSON_HANDLER
3133
import openfe
3234
from openfe.protocols.openmm_md.plain_md_methods import PlainMDProtocol
33-
from openfe.protocols.openmm_afe import AbsoluteSolvationProtocol
35+
from openfe.protocols.openmm_afe import (
36+
AbsoluteSolvationProtocol,
37+
AbsoluteBindingProtocol,
38+
)
3439
from openfe.protocols.openmm_rfe import RelativeHybridTopologyProtocol
3540
from openfe.protocols.openmm_septop import SepTopProtocol
3641

42+
import sys
43+
from openfecli.utils import configure_logger
44+
45+
# avoid problems with output not showing if queueing system kills a job
46+
sys.stdout.reconfigure(line_buffering=True)
47+
48+
stdout_handler = logging.StreamHandler(sys.stdout)
49+
configure_logger('gufekey', handler=stdout_handler)
50+
configure_logger('gufe', handler=stdout_handler)
51+
configure_logger('openfe', handler=stdout_handler)
52+
configure_logger('openmmtools.multistate.multistatereporter', level=logging.DEBUG, handler=stdout_handler)
53+
configure_logger('openmmtools.multistate.multistatesampler', level=logging.DEBUG, handler=stdout_handler)
3754

3855
logger = logging.getLogger(__name__)
3956

@@ -64,7 +81,27 @@ def get_hif2a_inputs():
6481
return smcs, protcomp
6582

6683

67-
def execute_and_serialize(dag, protocol, simname):
84+
def execute_and_serialize(
85+
dag,
86+
protocol,
87+
simname,
88+
new_serialization: bool = False
89+
):
90+
"""
91+
Execute & serialize a DAG
92+
93+
Parameters
94+
----------
95+
dag : gufe.ProtocolDAG
96+
The DAG to execute & serialize.
97+
protocol : gufe.Protocol
98+
The Protocol to which the DAG belongs.
99+
simname : str
100+
The name of the simulation, used for the serialized file name.
101+
new_serialization : bool
102+
Whether or not we should use the "new" `to_json` serialization.
103+
Default is False (for now).
104+
"""
68105
logger.info(f"running {simname}")
69106
with tempfile.TemporaryDirectory() as tmpdir:
70107
workdir = pathlib.Path(tmpdir)
@@ -73,22 +110,27 @@ def execute_and_serialize(dag, protocol, simname):
73110
shared_basedir=workdir,
74111
scratch_basedir=workdir,
75112
keep_shared=True,
76-
n_retries=3
113+
raise_error=True,
114+
n_retries=2,
77115
)
78116
protres = protocol.gather([dagres])
79117

80-
outdict = {
81-
"estimate": protres.get_estimate(),
82-
"uncertainty": protres.get_uncertainty(),
83-
"protocol_result": protres.to_dict(),
84-
"unit_results": {
85-
unit.key: unit.to_keyed_dict()
86-
for unit in dagres.protocol_unit_results
118+
if new_serialization:
119+
protres.to_json(f"{simname}_json_results.json")
120+
121+
else:
122+
outdict = {
123+
"estimate": protres.get_estimate(),
124+
"uncertainty": protres.get_uncertainty(),
125+
"protocol_result": protres.to_dict(),
126+
"unit_results": {
127+
unit.key: unit.to_keyed_dict()
128+
for unit in dagres.protocol_unit_results
129+
}
87130
}
88-
}
89131

90-
with gzip.open(f"{simname}_json_results.gz", 'wt') as zipfile:
91-
json.dump(outdict, zipfile, cls=JSON_HANDLER.encoder)
132+
with gzip.open(f"{simname}_json_results.gz", 'wt') as zipfile:
133+
json.dump(outdict, zipfile, cls=JSON_HANDLER.encoder)
92134

93135

94136
def generate_md_settings():
@@ -109,6 +151,52 @@ def generate_md_json(smc):
109151
execute_and_serialize(dag, protocol, "MDProtocol")
110152

111153

154+
def generate_abfe_settings():
155+
settings = AbsoluteBindingProtocol.default_settings()
156+
settings.solvent_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
157+
settings.solvent_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
158+
settings.solvent_equil_simulation_settings.production_length = 10 * unit.picosecond
159+
settings.solvent_simulation_settings.equilibration_length = 100 * unit.picosecond
160+
settings.solvent_simulation_settings.production_length = 500 * unit.picosecond
161+
settings.solvent_simulation_settings.time_per_iteration = 2.5 * unit.ps
162+
settings.complex_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
163+
settings.complex_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
164+
settings.complex_equil_simulation_settings.production_length = 100 * unit.picosecond
165+
settings.complex_simulation_settings.equilibration_length = 100 * unit.picosecond
166+
settings.complex_simulation_settings.production_length = 500 * unit.picosecond
167+
settings.complex_simulation_settings.time_per_iteration = 2.5 * unit.ps
168+
settings.solvent_solvation_settings.box_shape = 'dodecahedron'
169+
settings.complex_solvation_settings.box_shape = 'dodecahedron'
170+
settings.solvent_solvation_settings.solvent_padding = 1.5 * unit.nanometer
171+
settings.complex_solvation_settings.solvent_padding = 1.0 * unit.nanometer
172+
settings.forcefield_settings.nonbonded_cutoff = 0.8 * unit.nanometer
173+
settings.protocol_repeats = 3
174+
settings.engine_settings.compute_platform = 'CUDA'
175+
176+
return settings
177+
178+
179+
def generate_abfe_json():
180+
ligands, protein = get_hif2a_inputs()
181+
protocol = AbsoluteBindingProtocol(settings=generate_abfe_settings())
182+
sysA = openfe.ChemicalSystem(
183+
{
184+
"ligand": ligands[0],
185+
"protein": protein,
186+
"solvent": openfe.SolventComponent(),
187+
}
188+
)
189+
sysB = openfe.ChemicalSystem(
190+
{
191+
"protein": protein,
192+
"solvent": openfe.SolventComponent(),
193+
}
194+
)
195+
196+
dag = protocol.create(stateA=sysA, stateB=sysB, mapping=None)
197+
execute_and_serialize(dag, protocol, "ABFEProtocol", new_serialization=True)
198+
199+
112200
def generate_ahfe_settings():
113201
settings = AbsoluteSolvationProtocol.default_settings()
114202
settings.solvent_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
@@ -136,7 +224,7 @@ def generate_ahfe_settings():
136224

137225
return settings
138226

139-
227+
140228
def generate_ahfe_json(smc):
141229
protocol = AbsoluteSolvationProtocol(settings=generate_ahfe_settings())
142230
sysA = openfe.ChemicalSystem(
@@ -156,7 +244,7 @@ def generate_rfe_settings():
156244
settings.simulation_settings.equilibration_length = 10 * unit.picosecond
157245
settings.simulation_settings.production_length = 250 * unit.picosecond
158246
settings.forcefield_settings.nonbonded_method = "nocutoff"
159-
247+
160248
return settings
161249

162250

@@ -222,12 +310,13 @@ def generate_septop_json():
222310

223311
dag = protocol.create(stateA=sysA, stateB=sysB, mapping=None)
224312
execute_and_serialize(dag, protocol, "SepTopProtocol")
225-
313+
226314

227315
if __name__ == "__main__":
228316
molA = get_molecule(LIGA, "ligandA")
229317
molB = get_molecule(LIGB, "ligandB")
230318
generate_md_json(molA)
319+
generate_abfe_json()
231320
generate_ahfe_json(molA)
232321
generate_rfe_json(molA, molB)
233322
generate_septop_json()
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
Absolute Binding Protocol
2+
=========================
3+
4+
Overview
5+
--------
6+
7+
The :class:`AbsoluteBindingProtocol <.AbsoluteBindingProtocol>` calculates the absolute binding free energy,
8+
which is the free energy difference between a ligand in solution and the ligand bound to a protein.
9+
10+
The absolute binding free energy is calculated through a thermodynamic cycle.
11+
In this cycle, the interactions of the molecule are decoupled, meaning turned off,
12+
using a partial annihilation scheme (see below) both in the solvent and in the complex phases.
13+
14+
Restraints are required to keep the weakly
15+
coupled and fully decoupled ligand in the binding site region and thereby reduce the phase
16+
space that needs to be sampled. In the :class:`AbsoluteBindingProtocol <.AbsoluteBindingProtocol>`
17+
we apply orientational, or Boresch-style, restraints, as described below.
18+
19+
The absolute binding free energy is then obtained via summation of free energy differences along the thermodynamic cycle.
20+
21+
.. figure:: img/abfe-cycle.png
22+
:scale: 50%
23+
24+
Thermodynamic cycle for the absolute binding free energy protocol.
25+
26+
Scientific Details
27+
------------------
28+
29+
Orientational restraints
30+
~~~~~~~~~~~~~~~~~~~~~~~~
31+
32+
Orientational, or Boresch-style, restraints are automaticallly (unless manually specified) applied between three
33+
protein and three ligand atoms using one bond, two angle, and three dihedral restraints.
34+
Reference atoms are picked based on different criteria, such as the root mean squared
35+
fluctuation of the atoms in a short MD simulation, the secondary structure of the protein,
36+
and the distance between atoms, based on heuristics from Baumann et al. [1]_ and Alibay et al. [2]_.
37+
Two strategies for selecting protein atoms are available, either picking atoms that are bonded to each other or that can span multiple residues.
38+
This can be specified using the ``restraint_settings.anchor_finding_strategy`` settings.
39+
40+
Partial annihilation scheme
41+
~~~~~~~~~~~~~~~~~~~~~~~~~~~
42+
43+
In the :class:`.AbsoluteBindingProtocol` the coulombic interactions of the molecule are fully turned off (annihilated).
44+
The Lennard-Jones interactions are instead decoupled, meaning the intermolecular interactions are turned off, keeping the intramolecular Lennard-Jones interactions.
45+
46+
The lambda schedule
47+
~~~~~~~~~~~~~~~~~~~
48+
49+
Molecular interactions are turned off during an alchemical path using a discrete set of lambda windows.
50+
For the transformation in the binding site, the following steps are carried out, starting with the ligand fully interacting in the binding site.
51+
52+
1. Restrain the ligand using orientational restraints.
53+
2. Turn off the electrostatic interactions of the ligand.
54+
3. Decouple Lennard-Jones interactions of the ligand.
55+
4. Release the restraints of the now dummy ligand analytically.
56+
57+
The lambda schedule in the solvent phase is similar to the one in the complex, except that no restraints are applied.
58+
A soft-core potential is applied to the Lennard-Jones potential to avoid instablilites in intermediate lambda windows.
59+
The soft-core potential function from Beutler et al. [3]_ is used by default.
60+
The lambda schedule is defined in the ``lambda_settings`` objects ``lambda_elec``, ``lambda_vdw``, and ``lambda_restraints``.
61+
62+
Simulation overview
63+
~~~~~~~~~~~~~~~~~~~
64+
65+
The :class:`.ProtocolDAG` of the :class:`.AbsoluteBindingProtocol` contains :class:`.ProtocolUnit`\ s from both the complex and solvent transformations.
66+
This means that both legs of the thermodynamic cycle are constructed and run concurrently in the same :class:`.ProtocolDAG`.
67+
This is different from the :class:`.RelativeHybridTopologyProtocol` where the :class:`.ProtocolDAG` only runs a single leg of a thermodynamic cycle.
68+
If multiple ``protocol_repeats`` are run (default: ``protocol_repeats=3``), the :class:`.ProtocolDAG` contains multiple :class:`.ProtocolUnit`\ s of both complex and solvent transformations.
69+
70+
Simulation steps
71+
""""""""""""""""
72+
73+
Each :class:`.ProtocolUnit` (whether complex or solvent) carries out the following steps:
74+
75+
1. Parameterize the system using `OpenMMForceFields <https://github.com/openmm/openmmforcefields>`_ and `Open Force Field <https://github.com/openforcefield/openff-forcefields>`_.
76+
2. Equilibrate the fully interacting system using a short MD simulation using the same approach as the :class:`.PlainMDProtocol` (including rounds of NVT and NPT equilibration).
77+
3. Create an alchemical system.
78+
4. Add orientational restraints to the complex system.
79+
5. Minimize the alchemical system.
80+
6. Equilibrate and production simulate the alchemical system using the chosen multistate sampling method (under NPT conditions).
81+
7. Analyze results for the transformation.
82+
83+
.. note:: Three different types of multistate sampling (i.e. replica swapping between lambda states) methods can be chosen; HREX, SAMS, and independent (no lambda swaps attempted).
84+
By default the HREX approach is selected, this can be altered using ``solvent_simulation_settings.sampler_method`` or ``complex_simulation_settings.sampler_method`` (default: ``repex``).
85+
86+
Simulation details
87+
""""""""""""""""""
88+
89+
Here are some details of how the simulation is carried out which are not detailed in the :class:`.AbsoluteBindingSettings`:
90+
91+
* The protocol applies a `LangevinMiddleIntegrator <https://openmmtools.readthedocs.io/en/latest/api/generated/openmmtools.mcmc.LangevinDynamicsMove.html>`_ which uses Langevin dynamics, with the LFMiddle discretization [4]_.
92+
* A MonteCarloBarostat is used in the NPT ensemble to maintain constant pressure.
93+
94+
Getting the free energy estimate
95+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
96+
97+
The free energy differences are obtained from simulation data using the `MBAR estimator <https://www.alchemistry.org/wiki/Multistate_Bennett_Acceptance_Ratio>`_ (multistate Bennett acceptance ratio estimator) as implemented in the `PyMBAR package <https://pymbar.readthedocs.io/en/master/mbar.html>`_.
98+
Both the MBAR estimates of the two legs of the thermodynamic cycle, and the overall absolute binding free energy (of the entire cycle) are obtained,
99+
which is different compared to the results in the :class:`.RelativeHybridTopologyProtocol` where results from two legs of the thermodynamic cycle are obtained separately.
100+
101+
In addition to the estimates of the free energy changes and their uncertainty, the protocol also returns some metrics to help assess convergence of the results, these are detailed in the :ref:`multistate analysis section <multistate_analysis>`.
102+
103+
See Also
104+
--------
105+
106+
**Setting up AFE calculations**
107+
108+
* :ref:`Defining the Protocol <defining-protocols>`
109+
110+
111+
**Tutorials**
112+
113+
* :any:`Absolute Binding Free Energies tutorial <../../tutorials/abfe_tutorial>`
114+
115+
**Cookbooks**
116+
117+
:ref:`Cookbooks <cookbooks>`
118+
119+
**API Documentation**
120+
121+
* :ref:`OpenMM Absolute Binding Free Energy <afe binding protocol api>`
122+
* :ref:`OpenMM Protocol Settings <openmm protocol settings api>`
123+
124+
References
125+
----------
126+
127+
* `pymbar <https://pymbar.readthedocs.io/en/stable/>`_
128+
* `yank <http://getyank.org/latest/>`_
129+
* `OpenMMTools <https://openmmtools.readthedocs.io/en/stable/>`_
130+
* `OpenMM <https://openmm.org/>`_
131+
132+
.. [1] Broadening the Scope of Binding Free Energy Calculations Using a Separated Topologies Approach, H. Baumann, E. Dybeck, C. McClendon, F. Pickard IV, V. Gapsys, L. Pérez-Benito, D. Hahn, G. Tresadern, A. Mathiowetz, D. Mobley, J. Chem. Theory Comput., 2023, 19, 15, 5058–5076
133+
.. [2] Evaluating the use of absolute binding free energy in the fragment optimisation process, I. Alibay, A. Magarkar, D. Seeliger, P. Biggin, Commun Chem 5, 105 (2022)
134+
.. [3] Avoiding singularities and numerical instabilities in free energy calculations based on molecular simulations, T.C. Beutler, A.E. Mark, R.C. van Schaik, P.R. Greber, and W.F. van Gunsteren, Chem. Phys. Lett., 222 529–539 (1994)
135+
.. [4] Unified Efficient Thermostat Scheme for the Canonical Ensemble with Holonomic or Isokinetic Constraints via Molecular Dynamics, Zhijun Zhang, Xinzijian Liu, Kangyu Yan, Mark E. Tuckerman, and Jian Liu, J. Phys. Chem. A 2019, 123, 28, 6056-6079
151 KB
Loading

docs/guide/protocols/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Details on the theory and behaviour of different Protocols are listed here.
77

88
.. toctree::
99
relativehybridtopology
10+
absolutebinding
1011
absolutesolvation
1112
septop
1213
plainmd

docs/reference/api/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ OpenFE API Reference
1717
defining_and_executing_simulations
1818
openmm_rfe
1919
openmm_solvation_afe
20+
openmm_binding_afe
2021
openmm_septop
2122
openmm_md
2223
openmm_protocol_settings
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
OpenMM Absolute Binding Free Energy Protocol
2+
============================================
3+
4+
.. _afe binding protocol api:
5+
6+
This section provides details about the OpenMM Absolute Binding Free Energy Protocol
7+
implemented in OpenFE.
8+
9+
Protocol API specification
10+
--------------------------
11+
12+
.. module:: openfe.protocols.openmm_afe.equil_binding_afe_method
13+
14+
.. autosummary::
15+
:nosignatures:
16+
:toctree: generated/
17+
18+
AbsoluteBindingProtocol
19+
AbsoluteBindingComplexUnit
20+
AbsoluteBindingSolventUnit
21+
AbsoluteBindingProtocolResult
22+
23+
Protocol Settings
24+
-----------------
25+
26+
27+
Below are the settings which can be tweaked in the protocol. The default settings (accessed using :meth:`AbsoluteBindingProtocol.default_settings`) will automatically populate settings which we have found to be useful for running binding free energy calculations. There will however be some cases (such as when calculating difficult to converge systems) where you will need to tweak some of the following settings.
28+
29+
30+
.. module:: openfe.protocols.openmm_afe.equil_afe_settings
31+
32+
.. autopydantic_model:: AbsoluteBindingSettings
33+
:model-show-json: False
34+
:model-show-field-summary: False
35+
:model-show-config-member: False
36+
:model-show-config-summary: False
37+
:model-show-validator-members: False
38+
:model-show-validator-summary: False
39+
:field-list-validators: False
40+
:inherited-members: SettingsBaseModel
41+
:exclude-members: get_defaults
42+
:member-order: bysource

0 commit comments

Comments
 (0)