Skip to content

Commit 12d416e

Browse files
Add X23 benchmark (ddmms#118)
1 parent 73ad419 commit 12d416e

File tree

6 files changed

+380
-0
lines changed

6 files changed

+380
-0
lines changed

docs/source/user_guide/benchmarks/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ Benchmarks
99
nebs
1010
supramolecular
1111
physicality
12+
molecular_crystal
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
==================
2+
Molecular Crystals
3+
==================
4+
5+
X23
6+
===
7+
8+
Summary
9+
-------
10+
11+
Performance in predicting lattice energies for 23 molecular crystals.
12+
13+
14+
Metrics
15+
-------
16+
17+
1. Lattice energy error
18+
19+
How accurate lattice energy predictions are.
20+
21+
For each molecular crystal, lattice energy is calculated by taking the difference
22+
between the energy of the solid molecular crystal divided by the number of molecules it
23+
comprises, and the energy of the isolated molecule. This is compared to the reference
24+
lattice energy.
25+
26+
27+
Computational cost
28+
------------------
29+
30+
Low: tests are likely to take less than a minute to run on CPU.
31+
32+
33+
Data availability
34+
-----------------
35+
36+
Input structures:
37+
38+
* A. M. Reilly and A. Tkatchenko, Understanding the role of vibrations, exact exchange,
39+
and many-body van der waals interactions in the cohesive properties of molecular
40+
crystals, The Journal of chemical physics 139 (2013).
41+
42+
Reference data:
43+
44+
* Same as input data
45+
* PBE-D3(BJ)
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""Analyse X23 benchmark."""
2+
3+
from __future__ import annotations
4+
5+
from ase import units
6+
from ase.io import read, write
7+
import pytest
8+
9+
from ml_peg.analysis.utils.decorators import build_table, plot_parity
10+
from ml_peg.analysis.utils.utils import mae
11+
from ml_peg.app import APP_ROOT
12+
from ml_peg.calcs import CALCS_ROOT
13+
from ml_peg.models.get_models import get_model_names
14+
from ml_peg.models.models import current_models
15+
16+
MODELS = get_model_names(current_models)
17+
CALC_PATH = CALCS_ROOT / "molecular_crystal" / "X23" / "outputs"
18+
OUT_PATH = APP_ROOT / "data" / "molecular_crystal" / "X23"
19+
20+
# Unit conversion
21+
EV_TO_KJ_PER_MOL = units.mol / units.kJ
22+
23+
DEFAULT_THRESHOLDS = {"MAE": (0.0, 100.0)}
24+
25+
26+
def get_system_names() -> list[str]:
27+
"""
28+
Get list of X23 system names.
29+
30+
Returns
31+
-------
32+
list[str]
33+
List of system names from structure files.
34+
"""
35+
system_names = []
36+
for model_name in MODELS:
37+
model_dir = CALC_PATH / model_name
38+
if model_dir.exists():
39+
xyz_files = sorted(model_dir.glob("*.xyz"))
40+
if xyz_files:
41+
for xyz_file in xyz_files:
42+
atoms = read(xyz_file)
43+
system_names.append(atoms.info["system"])
44+
break
45+
return system_names
46+
47+
48+
@pytest.fixture
49+
@plot_parity(
50+
filename=OUT_PATH / "figure_lattice_energies.json",
51+
title="X23 Lattice Energies",
52+
x_label="Predicted lattice energy / kJ/mol",
53+
y_label="Reference lattice energy / kJ/mol",
54+
hoverdata={
55+
"System": get_system_names(),
56+
},
57+
)
58+
def lattice_energies() -> dict[str, list]:
59+
"""
60+
Get lattice energies for all X23 systems.
61+
62+
Returns
63+
-------
64+
dict[str, list]
65+
Dictionary of reference and predicted lattice energies.
66+
"""
67+
results = {"ref": []} | {mlip: [] for mlip in MODELS}
68+
ref_stored = False
69+
70+
for model_name in MODELS:
71+
model_dir = CALC_PATH / model_name
72+
73+
if not model_dir.exists():
74+
continue
75+
76+
xyz_files = sorted(model_dir.glob("*.xyz"))
77+
if not xyz_files:
78+
continue
79+
80+
for xyz_file in xyz_files:
81+
structs = read(xyz_file, index=":")
82+
83+
solid_energy = structs[0].get_potential_energy()
84+
num_molecules = structs[0].info["num_molecules"]
85+
system = structs[0].info["system"]
86+
molecule_energy = structs[1].get_potential_energy()
87+
88+
lattice_energy = (solid_energy / num_molecules) - molecule_energy
89+
results[model_name].append(lattice_energy * EV_TO_KJ_PER_MOL)
90+
91+
# Copy individual structure files to app data directory
92+
structs_dir = OUT_PATH / model_name
93+
structs_dir.mkdir(parents=True, exist_ok=True)
94+
write(structs_dir / f"{system}.xyz", structs)
95+
96+
# Store reference energies (only once)
97+
if not ref_stored:
98+
results["ref"].append(structs[0].info["ref"])
99+
100+
ref_stored = True
101+
102+
return results
103+
104+
105+
@pytest.fixture
106+
def x23_errors(lattice_energies) -> dict[str, float]:
107+
"""
108+
Get mean absolute error for lattice energies.
109+
110+
Parameters
111+
----------
112+
lattice_energies
113+
Dictionary of reference and predicted lattice energies.
114+
115+
Returns
116+
-------
117+
dict[str, float]
118+
Dictionary of predicted lattice energy errors for all models.
119+
"""
120+
results = {}
121+
for model_name in MODELS:
122+
if lattice_energies[model_name]:
123+
results[model_name] = mae(
124+
lattice_energies["ref"], lattice_energies[model_name]
125+
)
126+
else:
127+
results[model_name] = None
128+
return results
129+
130+
131+
@pytest.fixture
132+
@build_table(
133+
filename=OUT_PATH / "x23_metrics_table.json",
134+
metric_tooltips={
135+
"Model": "Name of the model",
136+
"MAE": "Mean Absolute Error for all systems (kJ/mol)",
137+
},
138+
thresholds=DEFAULT_THRESHOLDS,
139+
)
140+
def metrics(x23_errors: dict[str, float]) -> dict[str, dict]:
141+
"""
142+
Get all X23 metrics.
143+
144+
Parameters
145+
----------
146+
x23_errors
147+
Mean absolute errors for all systems.
148+
149+
Returns
150+
-------
151+
dict[str, dict]
152+
Metric names and values for all models.
153+
"""
154+
return {
155+
"MAE": x23_errors,
156+
}
157+
158+
159+
def test_x23(metrics: dict[str, dict]) -> None:
160+
"""
161+
Run X23 test.
162+
163+
Parameters
164+
----------
165+
metrics
166+
All X23 metrics.
167+
"""
168+
return
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""Run X23 app."""
2+
3+
from __future__ import annotations
4+
5+
from dash import Dash
6+
from dash.html import Div
7+
8+
from ml_peg.app import APP_ROOT
9+
from ml_peg.app.base_app import BaseApp
10+
from ml_peg.app.utils.build_callbacks import (
11+
plot_from_table_column,
12+
struct_from_scatter,
13+
)
14+
from ml_peg.app.utils.load import read_plot
15+
from ml_peg.models.get_models import get_model_names
16+
from ml_peg.models.models import current_models
17+
18+
MODELS = get_model_names(current_models)
19+
BENCHMARK_NAME = "X23 Lattice Energies"
20+
DOCS_URL = (
21+
"https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_crystal.html#x23"
22+
)
23+
DATA_PATH = APP_ROOT / "data" / "molecular_crystal" / "X23"
24+
25+
26+
class X23App(BaseApp):
27+
"""X23 benchmark app layout and callbacks."""
28+
29+
def register_callbacks(self) -> None:
30+
"""Register callbacks to app."""
31+
scatter = read_plot(
32+
DATA_PATH / "figure_lattice_energies.json",
33+
id=f"{BENCHMARK_NAME}-figure",
34+
)
35+
36+
# Assets dir will be parent directory - individual files for each system
37+
structs_dir = DATA_PATH / MODELS[0]
38+
structs = [
39+
f"assets/molecular_crystal/X23/{MODELS[0]}/{struct_file.stem}.xyz"
40+
for struct_file in sorted(structs_dir.glob("*.xyz"))
41+
]
42+
43+
plot_from_table_column(
44+
table_id=self.table_id,
45+
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
46+
column_to_plot={"MAE": scatter},
47+
)
48+
49+
struct_from_scatter(
50+
scatter_id=f"{BENCHMARK_NAME}-figure",
51+
struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
52+
structs=structs,
53+
mode="struct",
54+
)
55+
56+
57+
def get_app() -> X23App:
58+
"""
59+
Get X23 benchmark app layout and callback registration.
60+
61+
Returns
62+
-------
63+
X23App
64+
Benchmark layout and callback registration.
65+
"""
66+
return X23App(
67+
name=BENCHMARK_NAME,
68+
description="Lattice energies for 23 organic molecular crystals.",
69+
docs_url=DOCS_URL,
70+
table_path=DATA_PATH / "x23_metrics_table.json",
71+
extra_components=[
72+
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
73+
Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
74+
],
75+
)
76+
77+
78+
if __name__ == "__main__":
79+
# Create Dash app
80+
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
81+
82+
# Construct layout and register callbacks
83+
x23_app = get_app()
84+
full_app.layout = x23_app.layout
85+
x23_app.register_callbacks()
86+
87+
# Run app
88+
full_app.run(port=8053, debug=True)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
title: Molecular Crystals
2+
description: Formation energies of molecular crystals
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""Run calculations for X23 tests."""
2+
3+
from __future__ import annotations
4+
5+
from copy import copy
6+
from pathlib import Path
7+
from typing import Any
8+
9+
from ase import units
10+
from ase.io import read, write
11+
import numpy as np
12+
import pytest
13+
14+
from ml_peg.calcs.utils.utils import get_benchmark_data
15+
from ml_peg.models.get_models import load_models
16+
from ml_peg.models.models import current_models
17+
18+
MODELS = load_models(current_models)
19+
20+
DATA_PATH = Path(__file__).parent / "data"
21+
OUT_PATH = Path(__file__).parent / "outputs"
22+
23+
# Unit conversion
24+
EV_TO_KJ_PER_MOL = units.mol / units.kJ
25+
26+
27+
@pytest.mark.parametrize("mlip", MODELS.items())
28+
def test_lattice_energy(mlip: tuple[str, Any]) -> None:
29+
"""
30+
Run X23 lattice energy test.
31+
32+
Parameters
33+
----------
34+
mlip
35+
Name of model use and model to get calculator.
36+
"""
37+
model_name, model = mlip
38+
calc = model.get_calculator()
39+
40+
# Add D3 calculator for this test
41+
calc = model.add_d3_calculator(calc)
42+
43+
# download X23 dataset
44+
lattice_energy_dir = get_benchmark_data("lattice_energy.zip") / "lattice_energy"
45+
46+
with open(lattice_energy_dir / "list") as f:
47+
systems = f.read().splitlines()
48+
49+
for system in systems:
50+
molecule_path = lattice_energy_dir / system / "POSCAR_molecule"
51+
solid_path = lattice_energy_dir / system / "POSCAR_solid"
52+
ref_path = lattice_energy_dir / system / "lattice_energy_DMC"
53+
num_molecules_path = lattice_energy_dir / system / "nmol"
54+
55+
molecule = read(molecule_path, index=0, format="vasp")
56+
molecule.calc = calc
57+
molecule.get_potential_energy()
58+
59+
solid = read(solid_path, index=0, format="vasp")
60+
solid.calc = copy(calc)
61+
solid.get_potential_energy()
62+
63+
ref = np.loadtxt(ref_path)[0]
64+
num_molecules = np.loadtxt(num_molecules_path)
65+
66+
solid.info["ref"] = ref
67+
solid.info["num_molecules"] = num_molecules
68+
solid.info["system"] = system
69+
molecule.info["ref"] = ref
70+
molecule.info["num_molecules"] = num_molecules
71+
molecule.info["system"] = system
72+
73+
# Write output structures
74+
write_dir = OUT_PATH / model_name
75+
write_dir.mkdir(parents=True, exist_ok=True)
76+
write(write_dir / f"{system}.xyz", [solid, molecule])

0 commit comments

Comments
 (0)