Add X23 benchmark (ddmms#118)

ElliottKasoar · web-flow · commit 12d416ee294d · 2025-11-06T14:01:58.000Z
diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst
@@ -9,3 +9,4 @@ Benchmarks
     nebs
     supramolecular
     physicality
+    molecular_crystal
diff --git a/docs/source/user_guide/benchmarks/molecular_crystal.rst b/docs/source/user_guide/benchmarks/molecular_crystal.rst
@@ -0,0 +1,45 @@
+==================
+Molecular Crystals
+==================
+
+X23
+===
+
+Summary
+-------
+
+Performance in predicting lattice energies for 23 molecular crystals.
+
+
+Metrics
+-------
+
+1. Lattice energy error
+
+How accurate lattice energy predictions are.
+
+For each molecular crystal, lattice energy is calculated by taking the difference
+between the energy of the solid molecular crystal divided by the number of molecules it
+comprises, and the energy of the isolated molecule. This is compared to the reference
+lattice energy.
+
+
+Computational cost
+------------------
+
+Low: tests are likely to take less than a minute to run on CPU.
+
+
+Data availability
+-----------------
+
+Input structures:
+
+* A. M. Reilly and A. Tkatchenko, Understanding the role of vibrations, exact exchange,
+  and many-body van der waals interactions in the cohesive properties of molecular
+  crystals, The Journal of chemical physics 139 (2013).
+
+Reference data:
+
+* Same as input data
+* PBE-D3(BJ)
diff --git a/ml_peg/analysis/molecular_crystal/X23/analyse_X23.py b/ml_peg/analysis/molecular_crystal/X23/analyse_X23.py
@@ -0,0 +1,168 @@
+"""Analyse X23 benchmark."""
+
+from __future__ import annotations
+
+from ase import units
+from ase.io import read, write
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+CALC_PATH = CALCS_ROOT / "molecular_crystal" / "X23" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "molecular_crystal" / "X23"
+
+# Unit conversion
+EV_TO_KJ_PER_MOL = units.mol / units.kJ
+
+DEFAULT_THRESHOLDS = {"MAE": (0.0, 100.0)}
+
+
+def get_system_names() -> list[str]:
+    """
+    Get list of X23 system names.
+
+    Returns
+    -------
+    list[str]
+        List of system names from structure files.
+    """
+    system_names = []
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if model_dir.exists():
+            xyz_files = sorted(model_dir.glob("*.xyz"))
+            if xyz_files:
+                for xyz_file in xyz_files:
+                    atoms = read(xyz_file)
+                    system_names.append(atoms.info["system"])
+                break
+    return system_names
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_lattice_energies.json",
+    title="X23 Lattice Energies",
+    x_label="Predicted lattice energy / kJ/mol",
+    y_label="Reference lattice energy / kJ/mol",
+    hoverdata={
+        "System": get_system_names(),
+    },
+)
+def lattice_energies() -> dict[str, list]:
+    """
+    Get lattice energies for all X23 systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted lattice energies.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+
+        if not model_dir.exists():
+            continue
+
+        xyz_files = sorted(model_dir.glob("*.xyz"))
+        if not xyz_files:
+            continue
+
+        for xyz_file in xyz_files:
+            structs = read(xyz_file, index=":")
+
+            solid_energy = structs[0].get_potential_energy()
+            num_molecules = structs[0].info["num_molecules"]
+            system = structs[0].info["system"]
+            molecule_energy = structs[1].get_potential_energy()
+
+            lattice_energy = (solid_energy / num_molecules) - molecule_energy
+            results[model_name].append(lattice_energy * EV_TO_KJ_PER_MOL)
+
+            # Copy individual structure files to app data directory
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+            write(structs_dir / f"{system}.xyz", structs)
+
+            # Store reference energies (only once)
+            if not ref_stored:
+                results["ref"].append(structs[0].info["ref"])
+
+        ref_stored = True
+
+    return results
+
+
+@pytest.fixture
+def x23_errors(lattice_energies) -> dict[str, float]:
+    """
+    Get mean absolute error for lattice energies.
+
+    Parameters
+    ----------
+    lattice_energies
+        Dictionary of reference and predicted lattice energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted lattice energy errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        if lattice_energies[model_name]:
+            results[model_name] = mae(
+                lattice_energies["ref"], lattice_energies[model_name]
+            )
+        else:
+            results[model_name] = None
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "x23_metrics_table.json",
+    metric_tooltips={
+        "Model": "Name of the model",
+        "MAE": "Mean Absolute Error for all systems (kJ/mol)",
+    },
+    thresholds=DEFAULT_THRESHOLDS,
+)
+def metrics(x23_errors: dict[str, float]) -> dict[str, dict]:
+    """
+    Get all X23 metrics.
+
+    Parameters
+    ----------
+    x23_errors
+        Mean absolute errors for all systems.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "MAE": x23_errors,
+    }
+
+
+def test_x23(metrics: dict[str, dict]) -> None:
+    """
+    Run X23 test.
+
+    Parameters
+    ----------
+    metrics
+        All X23 metrics.
+    """
+    return
diff --git a/ml_peg/app/molecular_crystal/X23/app_X23.py b/ml_peg/app/molecular_crystal/X23/app_X23.py
@@ -0,0 +1,88 @@
+"""Run X23 app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "X23 Lattice Energies"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_crystal.html#x23"
+)
+DATA_PATH = APP_ROOT / "data" / "molecular_crystal" / "X23"
+
+
+class X23App(BaseApp):
+    """X23 benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure_lattice_energies.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        # Assets dir will be parent directory - individual files for each system
+        structs_dir = DATA_PATH / MODELS[0]
+        structs = [
+            f"assets/molecular_crystal/X23/{MODELS[0]}/{struct_file.stem}.xyz"
+            for struct_file in sorted(structs_dir.glob("*.xyz"))
+        ]
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"MAE": scatter},
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+def get_app() -> X23App:
+    """
+    Get X23 benchmark app layout and callback registration.
+
+    Returns
+    -------
+    X23App
+        Benchmark layout and callback registration.
+    """
+    return X23App(
+        name=BENCHMARK_NAME,
+        description="Lattice energies for 23 organic molecular crystals.",
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "x23_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    # Create Dash app
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+
+    # Construct layout and register callbacks
+    x23_app = get_app()
+    full_app.layout = x23_app.layout
+    x23_app.register_callbacks()
+
+    # Run app
+    full_app.run(port=8053, debug=True)
diff --git a/ml_peg/app/molecular_crystal/molecular_crystal.yml b/ml_peg/app/molecular_crystal/molecular_crystal.yml
@@ -0,0 +1,2 @@
+title: Molecular Crystals
+description: Formation energies of molecular crystals
diff --git a/ml_peg/calcs/molecular_crystal/X23/calc_X23.py b/ml_peg/calcs/molecular_crystal/X23/calc_X23.py
@@ -0,0 +1,76 @@
+"""Run calculations for X23 tests."""
+
+from __future__ import annotations
+
+from copy import copy
+from pathlib import Path
+from typing import Any
+
+from ase import units
+from ase.io import read, write
+import numpy as np
+import pytest
+
+from ml_peg.calcs.utils.utils import get_benchmark_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+DATA_PATH = Path(__file__).parent / "data"
+OUT_PATH = Path(__file__).parent / "outputs"
+
+# Unit conversion
+EV_TO_KJ_PER_MOL = units.mol / units.kJ
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_lattice_energy(mlip: tuple[str, Any]) -> None:
+    """
+    Run X23 lattice energy test.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+    # Add D3 calculator for this test
+    calc = model.add_d3_calculator(calc)
+
+    # download X23 dataset
+    lattice_energy_dir = get_benchmark_data("lattice_energy.zip") / "lattice_energy"
+
+    with open(lattice_energy_dir / "list") as f:
+        systems = f.read().splitlines()
+
+    for system in systems:
+        molecule_path = lattice_energy_dir / system / "POSCAR_molecule"
+        solid_path = lattice_energy_dir / system / "POSCAR_solid"
+        ref_path = lattice_energy_dir / system / "lattice_energy_DMC"
+        num_molecules_path = lattice_energy_dir / system / "nmol"
+
+        molecule = read(molecule_path, index=0, format="vasp")
+        molecule.calc = calc
+        molecule.get_potential_energy()
+
+        solid = read(solid_path, index=0, format="vasp")
+        solid.calc = copy(calc)
+        solid.get_potential_energy()
+
+        ref = np.loadtxt(ref_path)[0]
+        num_molecules = np.loadtxt(num_molecules_path)
+
+        solid.info["ref"] = ref
+        solid.info["num_molecules"] = num_molecules
+        solid.info["system"] = system
+        molecule.info["ref"] = ref
+        molecule.info["num_molecules"] = num_molecules
+        molecule.info["system"] = system
+
+        # Write output structures
+        write_dir = OUT_PATH / model_name
+        write_dir.mkdir(parents=True, exist_ok=True)
+        write(write_dir / f"{system}.xyz", [solid, molecule])

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+title: Molecular Crystals`
	`2`	`+description: Formation energies of molecular crystals`