Skip to content

Commit 3372bb5

Browse files
add further directory parsing features to NebTaskDoc + tests
1 parent a1b4cba commit 3372bb5

File tree

4 files changed

+228
-18
lines changed

4 files changed

+228
-18
lines changed

emmet-core/emmet/core/neb.py

+193-17
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""Schemas and utils for NEB calculations."""
22

33
from datetime import datetime
4+
import numpy as np
45
from pathlib import Path
5-
from pydantic import BaseModel, Field
6-
from typing import Optional, Tuple, Union, Sequence
6+
from pydantic import BaseModel, Field, model_validator
7+
from scipy.interpolate import CubicSpline
8+
from typing import Optional, Tuple, Union, Sequence, Any
79
from typing_extensions import Self
810

911
from monty.os.path import zpath
@@ -41,14 +43,30 @@ class NebTaskDoc(BaseModel, extra="allow"):
4143
None,
4244
description="The initial and final configurations (reactants and products) of the barrier.",
4345
)
46+
endpoint_energies : Optional[Sequence[float]] = Field(
47+
None,
48+
description="Energies of the endpoint structures."
49+
)
50+
endpoint_calculations : Optional[list[Calculation]] = Field(
51+
None,
52+
description = "Calculation information for the endpoint structures"
53+
)
54+
endpoint_objects : Optional[list[dict]] = Field(
55+
None, description="VASP objects for each endpoint calculation."
56+
)
57+
endpoint_directories : Optional[list[str]] = Field(
58+
None, description="List of the directories for the endpoint calculations."
59+
)
4460

45-
image_calculations: Optional[list[Calculation]] = Field(
46-
None, description="Full calculation output for the NEB images."
61+
image_structures: list[Structure] = Field(
62+
None, description="Final structures for each NEB images."
4763
)
4864
image_energies: Optional[list[float]] = Field(
4965
None, description="Final energies for each image"
5066
)
51-
67+
image_calculations: Optional[list[Calculation]] = Field(
68+
None, description="Full calculation output for the NEB images."
69+
)
5270
dir_name: str = Field(None, description="Top-level NEB calculation directory.")
5371

5472
image_directories: list[str] = Field(
@@ -87,27 +105,93 @@ class NebTaskDoc(BaseModel, extra="allow"):
87105
None, description="Timestamp for when this task was completed"
88106
)
89107

108+
forward_barrier: Optional[float] = Field(
109+
None,
110+
description=(
111+
"Forward barrier for this reaction, "
112+
"i.e., the transition state energy minus "
113+
"the reactant / initial configuration energy."
114+
),
115+
)
116+
117+
reverse_barrier: Optional[float] = Field(
118+
None,
119+
description=(
120+
"Reverse barrier for this reaction, "
121+
"i.e., the transition state energy minus "
122+
"the product / final configuration energy."
123+
),
124+
)
125+
126+
barrier_analysis: Optional[dict[str, Any]] = Field(
127+
None, description="Analysis of the reaction barrier."
128+
)
129+
130+
@model_validator(mode="after")
131+
def set_barriers(self) -> Self:
132+
"""Perform analysis on barrier if needed."""
133+
if not self.forward_barrier or not self.reverse_barrier:
134+
self.barrier_analysis = neb_barrier_spline_fit(self.energies)
135+
for k in ("forward", "reverse"):
136+
setattr(self, f"{k}_barrier", self.barrier_analysis[f"{k}_barrier"])
137+
return self
138+
139+
@property
140+
def num_images(self) -> int:
141+
"""Return the number of VASP calculations / number of images performed."""
142+
return len(self.image_directories)
143+
144+
@property
145+
def energies(self) -> list[float]:
146+
"""Return the endpoint (optional) and image energies."""
147+
if self.endpoint_energies is not None:
148+
return [self.endpoint_energies[0], *self.image_energies, self.endpoint_energies[1]]
149+
return self.image_energies
150+
151+
@property
152+
def structures(self) -> list[Structure]:
153+
"""Return the endpoint and image structures."""
154+
return [self.endpoint_structures[0], *self.image_structures, self.endpoint_structures[1]]
155+
90156
@classmethod
91157
def from_directory(
92158
cls,
93159
dir_name: Union[Path, str],
94160
volumetric_files: Tuple[str, ...] = _VOLUMETRIC_FILES,
161+
store_calculations : bool = True,
95162
**neb_task_doc_kwargs,
96163
) -> Self:
164+
"""
165+
Return an NebTaskDoc from a single NEB calculation directory.
166+
167+
This method populates only the image energies and calculations fields,
168+
and the endpoint structures.
169+
"""
97170
if isinstance(dir_name, str):
98171
dir_name = Path(dir_name)
99172

100173
neb_directories = sorted(dir_name.glob("[0-9][0-9]"))
101174

102-
endpoint_directories = [neb_directories[0], neb_directories[-1]]
103-
endpoint_structures = [
104-
Structure.from_file(zpath(f"{endpoint_dir}/POSCAR"))
105-
for endpoint_dir in endpoint_directories
106-
]
175+
if (ep_calcs := neb_task_doc_kwargs.pop("endpoint_calculations", None) ) is None:
176+
endpoint_directories = [neb_directories[0], neb_directories[-1]]
177+
endpoint_structures = [
178+
Structure.from_file(zpath(f"{endpoint_dir}/POSCAR"))
179+
for endpoint_dir in endpoint_directories
180+
]
181+
endpoint_energies = None
182+
else:
183+
endpoint_directories = neb_task_doc_kwargs.pop("endpoint_directories")
184+
endpoint_structures = [
185+
ep_calc.output.structure for ep_calc in ep_calcs
186+
]
187+
endpoint_energies = [
188+
ep_calc.output.energy for ep_calc in ep_calcs
189+
]
107190

108191
image_directories = neb_directories[1:-1]
109192

110193
image_calculations = []
194+
image_structures = []
111195
image_objects = {}
112196
for iimage, image_dir in enumerate(image_directories):
113197
vasp_files = _find_vasp_files(image_dir, volumetric_files=volumetric_files)
@@ -125,12 +209,15 @@ def from_directory(
125209
},
126210
)
127211
image_calculations.append(calc)
212+
image_structures.append(calc.output.structure)
213+
214+
calcs_to_check = image_calculations + (ep_calcs or [])
128215

129216
task_state = (
130217
TaskState.SUCCESS
131218
if all(
132219
calc.has_vasp_completed == TaskState.SUCCESS
133-
for calc in image_calculations
220+
for calc in calcs_to_check
134221
)
135222
else TaskState.FAILED
136223
)
@@ -160,7 +247,11 @@ def from_directory(
160247

161248
return cls(
162249
endpoint_structures=endpoint_structures,
163-
image_calculations=image_calculations,
250+
endpoint_energies = endpoint_energies,
251+
endpoint_directories = [str(ep_dir) for ep_dir in endpoint_directories],
252+
endpoint_calculations = ep_calcs if store_calculations else None,
253+
image_calculations=image_calculations if store_calculations else None,
254+
image_structures = image_structures,
164255
dir_name=str(dir_name),
165256
image_directories=[str(img_dir) for img_dir in image_directories],
166257
orig_inputs=inputs["orig_inputs"],
@@ -170,11 +261,96 @@ def from_directory(
170261
state=task_state,
171262
image_energies=[calc.output.energy for calc in image_calculations],
172263
custodian=_parse_custodian(dir_name),
173-
completed_at=max(calc.completed_at for calc in image_calculations),
264+
completed_at=max(calc.completed_at for calc in calcs_to_check),
174265
**neb_task_doc_kwargs,
175266
)
176267

177-
@property
178-
def num_images(self) -> int:
179-
"""Return the number of VASP calculations / number of images performed."""
180-
return len(self.image_directories)
268+
@classmethod
269+
def from_directories(
270+
cls,
271+
endpoint_directories: list[str | Path],
272+
neb_directory: str | Path,
273+
volumetric_files: Tuple[str, ...] = _VOLUMETRIC_FILES,
274+
**neb_task_doc_kwargs
275+
) -> Self:
276+
"""
277+
Return an NebTaskDoc from endpoint and NEB calculation directories.
278+
279+
This method populates the endpoint and image fields completely,
280+
permitting an analysis of the barrier.
281+
"""
282+
endpoint_calculations = [None for _ in range(2)]
283+
endpoint_objects = [None for _ in range(2)]
284+
for idx, endpoint_dir in enumerate(endpoint_directories):
285+
vasp_files = _find_vasp_files(endpoint_dir, volumetric_files=volumetric_files)
286+
ep_key = "standard" if vasp_files.get("standard") else "relax" + str(max(
287+
int(k.split("relax")[-1]) for k in vasp_files if k.startswith("relax")
288+
))
289+
290+
endpoint_calculations[idx], endpoint_objects[idx] = Calculation.from_vasp_files(
291+
dir_name=endpoint_dir,
292+
task_name=f"NEB endpoint {idx + 1}",
293+
vasprun_file=vasp_files[ep_key]["vasprun_file"],
294+
outcar_file=vasp_files[ep_key]["outcar_file"],
295+
contcar_file=vasp_files[ep_key]["contcar_file"],
296+
volumetric_files=vasp_files[ep_key].get("volumetric_files", []),
297+
oszicar_file=vasp_files[ep_key].get("oszicar_file", None),
298+
vasprun_kwargs={
299+
"parse_potcar_file": False,
300+
},
301+
)
302+
303+
return cls.from_directory(
304+
neb_directory,
305+
volumetric_files=volumetric_files,
306+
endpoint_calculations = endpoint_calculations,
307+
endpoint_objects = endpoint_objects,
308+
endpoint_directories = endpoint_directories,
309+
**neb_task_doc_kwargs
310+
)
311+
312+
def neb_barrier_spline_fit(
313+
energies: Sequence[float],
314+
spline_kwargs: dict | None = None,
315+
frame_match_tol: float = 1.0e-6,
316+
) -> dict[str, Any]:
317+
"""
318+
Define basic NEB analysis tools.
319+
320+
Parameters
321+
----------
322+
energies : Sequence[float]
323+
The energies sorted by increasing frame index. Must include endpoints.
324+
frame_match_tol : float = 1.e-6
325+
The tolerance for matching the transition state frame index to the
326+
input frame indices.
327+
"""
328+
analysis: dict[str, Any] = {
329+
"energies": list(energies),
330+
"frame_index": list(frame_idx := np.linspace(0.0, 1.0, len(energies))),
331+
}
332+
energies = np.array(energies)
333+
334+
spline_kwargs = spline_kwargs or {"bc_type": "clamped"}
335+
spline_fit = CubicSpline(frame_idx, energies, **spline_kwargs)
336+
analysis["cubic_spline_pars"] = list(spline_fit.c)
337+
338+
crit_points = spline_fit.derivative().roots()
339+
analysis["ts_frame_index"] = -1
340+
analysis["ts_energy"] = -np.inf
341+
for crit_point in crit_points:
342+
if (energy := spline_fit(crit_point)) > analysis["ts_energy"] and spline_fit(
343+
crit_point, 2
344+
) <= 0.0:
345+
analysis["ts_frame_index"] = crit_point
346+
analysis["ts_energy"] = float(energy)
347+
348+
analysis["ts_in_frames"] = any(
349+
abs(analysis["ts_frame_index"] - frame_idx)
350+
< frame_match_tol * max(frame_idx, frame_match_tol)
351+
for frame_idx in frame_idx
352+
)
353+
analysis["forward_barrier"] = analysis["ts_energy"] - energies[0]
354+
analysis["reverse_barrier"] = analysis["ts_energy"] - energies[-1]
355+
356+
return analysis

emmet-core/tests/test_neb.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def test_neb_doc(test_dir, from_dir: bool):
2222
if from_dir:
2323
with TemporaryDirectory() as tmpdir:
2424
shutil.unpack_archive(test_dir / "neb_sample_calc.zip", tmpdir, "zip")
25-
neb_doc = NebTaskDoc.from_directory(tmpdir)
25+
neb_doc = NebTaskDoc.from_directory(Path(tmpdir) / "neb")
2626
neb_doc_dict = jsanitize(neb_doc)
2727
else:
2828
neb_doc_dict = loadfn(test_dir / "Si_neb_doc.json.bz2", cls=None)
@@ -35,6 +35,9 @@ def test_neb_doc(test_dir, from_dir: bool):
3535
neb_doc = NebTaskDoc(**neb_doc_dict)
3636

3737
assert neb_doc.num_images == 3
38+
assert len(neb_doc.image_structures) == neb_doc.num_images
39+
assert len(neb_doc.energies) == neb_doc.num_images
40+
assert len(neb_doc.structures) == neb_doc.num_images + 2 # always includes endpoints
3841
assert isinstance(neb_doc.orig_inputs, OrigInputs)
3942

4043
# test that NEB image calculations are all VASP Calculation objects
@@ -73,3 +76,34 @@ def test_neb_doc(test_dir, from_dir: bool):
7376
for idx, energy in enumerate(neb_doc.image_energies)
7477
)
7578
assert len(neb_doc.image_energies) == neb_doc.num_images
79+
80+
def test_from_directories(test_dir):
81+
82+
with TemporaryDirectory() as tmpdir:
83+
tmpdir = Path(tmpdir)
84+
shutil.unpack_archive(test_dir / "neb_sample_calc.zip", tmpdir, "zip")
85+
neb_doc = NebTaskDoc.from_directories(
86+
[tmpdir / f"relax_endpoint_{idx+1}" for idx in range(2)],
87+
tmpdir / "neb",
88+
)
89+
90+
assert all(isinstance(ep_calc,Calculation) for ep_calc in neb_doc.endpoint_calculations)
91+
92+
assert all(
93+
"relax_endpoint_" in ep_dir for ep_dir in neb_doc.endpoint_directories
94+
)
95+
96+
assert len(neb_doc.energies) == neb_doc.num_images + 2
97+
assert len(neb_doc.structures) == neb_doc.num_images + 2
98+
assert isinstance(neb_doc.barrier_analysis,dict)
99+
100+
assert all(
101+
neb_doc.barrier_analysis.get(k) is not None
102+
for k in ("energies","frame_index","cubic_spline_pars","ts_frame_index","ts_energy","ts_in_frames","forward_barrier","reverse_barrier")
103+
)
104+
105+
assert all(
106+
getattr(neb_doc,f"{direction}_barrier") == neb_doc.barrier_analysis[f"{direction}_barrier"]
107+
for direction in ("forward", "reverse")
108+
)
109+

test_files/Si_neb_doc.json.bz2

2.41 KB
Binary file not shown.

test_files/neb_sample_calc.zip

1.81 MB
Binary file not shown.

0 commit comments

Comments
 (0)