Skip to content

Commit 1873989

Browse files
authored
Migrate md (#261)
* initial migration, restructuring, etc. * updating Rg, modularizing MDOutputs.normalize * further work on MDOutputs.normalize * save for testing * fix compile errors and restructure get_mol_rgs * rename model to method, results to outputs, time to simulationoutputs throughout * clean MD imports * more renaming * reverted outputs renaming to results * fix remaining tests * reinsert final_outputs in tests and comment for now * working old h5md tests * some renaming and refinement * fix mypy error * more mypy errors * ruff * ruff format * add MDAnalysis as dependency * start review fixes * fix imports and tests * add basic md tests * fix mypy errors * ruff * fix ruff version * try better typing for bond_list * ruff * pytest_asyncio dep * fix 3.11 mypy * proxy type instead of cond import * finish review edits * updated beyond_hf workflows naming * missed updates * mypy fix * further fix num settings * improve optional dep treatment in utils * done with review of util MD * done with workflow/md.py * comment out archive_to_universe till updated * ruff * reimplement dummy archive to universe * diff fix * replace missing comment * pull max mol warn out of loop * small code formatting * test cond dep * generalize get_diffusion_constant * add typed dicts to results * update cond import approach for MD schema * mypy adjustments * ruff format
1 parent 4202a42 commit 1873989

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3604
-256
lines changed

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,16 @@ dependencies = [
3939
"Documentation" = "https://nomad-coe.github.io/nomad-simulations/"
4040

4141
[project.optional-dependencies]
42+
md = [
43+
"MDAnalysis>=2.4.0",
44+
"networkx"
45+
]
4246
dev = [
4347
'mypy>=1.15',
4448
'pytest>= 5.3.0, <8',
4549
'pytest-timeout>=1.4.2',
4650
'pytest-cov>=2.7.1',
51+
'pytest-asyncio>=0.21.0',
4752
'ruff>=0.6',
4853
'structlog>=1.0',
4954
'typing-extensions>=4.12',

src/nomad_simulations/schema_packages/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
from nomad.metainfo import Datetime, Quantity
55

66

7-
class Time(ArchiveSection):
7+
class SimulationTime(ArchiveSection):
88
"""
99
Contains time-related quantities.
1010
"""
1111

1212
datetime_end = Quantity(
1313
type=Datetime,
1414
description="""
15-
The date and time when this computation ended.
15+
The date and time when the computation ended.
1616
""",
1717
a_eln=ELNAnnotation(component='DateTimeEditQuantity'),
1818
)
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import numpy as np
2+
from nomad.datamodel.data import ArchiveSection
3+
from nomad.metainfo import MEnum, Quantity, Section, SubSection
4+
5+
6+
# TODO This entire module is a prototype, to be tested and refined
7+
class ErrorEstimate(ArchiveSection):
8+
"""
9+
A generic container for uncertainty/error information associated with a PhysicalProperty.
10+
11+
Supports:
12+
- Scalar or array errors (aligned to the property's `value` shape).
13+
- Confidence/prediction intervals.
14+
- Named metrics (std, stderr, RMSE, MAE, ...).
15+
- Method/provenance metadata (bootstrap, jackknife, analytical, validation).
16+
"""
17+
18+
# What kind of measure is this?
19+
metric = Quantity(
20+
type=MEnum(
21+
'std',
22+
'stderr',
23+
'variance',
24+
'rmse',
25+
'mae',
26+
'mape',
27+
'ci', # confidence interval
28+
'pi', # prediction interval
29+
'iqr',
30+
'mad',
31+
'systematic_bias',
32+
'model_uncertainty',
33+
'other',
34+
),
35+
description="""
36+
The type of error or uncertainty metric being reported.
37+
38+
Allowed values are:
39+
40+
| Value | Description |
41+
|-------------------|-----------------------------------------------------------------------------|
42+
| `"std"` | Standard deviation of the observable. |
43+
| `"stderr"` | Standard error of the mean (std / √N). |
44+
| `"variance"` | Variance of the observable (σ²). |
45+
| `"rmse"` | Root-mean-square error between predictions and reference values. |
46+
| `"mae"` | Mean absolute error between predictions and reference values. |
47+
| `"mape"` | Mean absolute percentage error, expressed relative to reference values. |
48+
| `"ci"` | Confidence interval for the observable, typically with a specified level. |
49+
| `"pi"` | Prediction interval for new observations. |
50+
| `"iqr"` | Interquartile range (Q3 – Q1). |
51+
| `"mad"` | Median absolute deviation (robust alternative to standard deviation). |
52+
| `"systematic_bias"` | Estimated systematic offset (bias) between observed and true values. |
53+
| `"model_uncertainty"` | Uncertainty arising from the model itself (e.g., ML predictive spread). |
54+
| `"other"` | A different metric not covered above; further specified in `notes` or `definition_iri`. |
55+
""",
56+
)
57+
58+
# Optional URI to a formal definition (VIM/GUM, CODATA, or internal ontology)
59+
definition_iri = Quantity(
60+
type=str, description='IRI/URL pointing to a formal metric definition.'
61+
)
62+
63+
# Optional tags that further qualify the estimate (e.g., "bootstrap", "jackknife", "analytical")
64+
method = Quantity(
65+
type=str,
66+
description='Computation method for the estimate (e.g., bootstrap, jackknife, analytical).',
67+
)
68+
69+
n_samples = Quantity(
70+
type=np.int32,
71+
description='Number of samples used to compute the estimate (if applicable).',
72+
)
73+
74+
# Scope clarifies where this error applies
75+
scope = Quantity(
76+
type=MEnum('global', 'per_value', 'per_component', 'per_entity'),
77+
description="""
78+
The application scope of the estimate:
79+
- global: single number applies to the whole property;
80+
- per_value: array aligned with the property's value array;
81+
- per_component: aligned with a named component axis (see `component_axis`);
82+
- per_entity: aligned with referenced entities.
83+
""",
84+
)
85+
86+
# If scope == per_component, name the axis (e.g., "spin", "kpoint", "band", "species")
87+
component_axis = Quantity(
88+
type=str,
89+
description='Name of the component axis this estimate aligns to (used with scope=per_component).',
90+
)
91+
92+
# Scalar/array error value (std, stderr, rmse, mae, etc.)
93+
value = Quantity(
94+
type=np.float64,
95+
shape=['*'], # allow scalar (len 1) or arbitrary flatten/broadcast
96+
description='Error/uncertainty values for metrics such as std, stderr, rmse, mae, etc.',
97+
)
98+
99+
# Intervals (confidence or prediction)
100+
interval_type = Quantity(
101+
type=MEnum('confidence', 'prediction'),
102+
description='Type of interval if an interval is provided.',
103+
)
104+
105+
level = Quantity(
106+
type=np.float64, description='Interval level (e.g., 0.95 for 95% intervals).'
107+
)
108+
109+
lower = Quantity(
110+
type=np.float64,
111+
shape=['*'],
112+
description='Lower bound of the interval (scalar or array aligned to the target).',
113+
)
114+
115+
upper = Quantity(
116+
type=np.float64,
117+
shape=['*'],
118+
description='Upper bound of the interval (scalar or array aligned to the target).',
119+
)
120+
121+
# Optional note about known systematic effects (units should match the property)
122+
bias = Quantity(
123+
type=np.float64,
124+
shape=['*'],
125+
description='Estimated systematic bias (scalar or array).',
126+
)
127+
128+
# Free-form notes (e.g., cross-validation split, dataset, calibration model, etc.)
129+
notes = Quantity(
130+
type=str, description='Free-text provenance or remarks about the estimate.'
131+
)
132+
133+
def normalize(self, archive, logger):
134+
# Basic metric/interval consistency checks (generic, variable-free messages)
135+
if self.metric in ('ci', 'pi') and self.interval_type is None:
136+
logger.warning(
137+
'Interval-type metric is used without specifying an interval type.'
138+
)
139+
140+
if self.interval_type is not None and self.metric not in ('ci', 'pi', 'other'):
141+
logger.warning(
142+
'Interval type is set but the metric is not an interval metric.'
143+
)
144+
145+
# Level sanity (if provided)
146+
if self.level is not None and not (0.0 < self.level < 1.0):
147+
logger.warning(
148+
'Interval level is outside the typical open interval (0, 1).'
149+
)
150+
151+
# Interval completeness
152+
if (self.lower is None) ^ (self.upper is None):
153+
logger.warning(
154+
'Only one interval bound is provided; both lower and upper are recommended.'
155+
)
156+
157+
# Scope hints
158+
if self.scope is None:
159+
logger.info(
160+
'No scope specified for the error estimate; default interpretation may apply.'
161+
)
162+
163+
# Shape alignment warnings are intentionally generic (no values in logs)
164+
# You may later add property-aware checks in PhysicalProperty.normalize if needed.

src/nomad_simulations/schema_packages/general.py

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from nomad_simulations.schema_packages.outputs import Outputs
2323
from nomad_simulations.schema_packages.utils import get_composition, log
2424

25-
from .common import Time
25+
from .common import SimulationTime
2626

2727
configuration = config.get_plugin_entry_point(
2828
'nomad_simulations.schema_packages:nomad_simulations_plugin'
@@ -115,7 +115,7 @@ class Program(Entity):
115115
)
116116

117117

118-
class BaseSimulation(Activity, Time):
118+
class BaseSimulation(Activity, SimulationTime):
119119
"""
120120
A computational simulation that produces output data from a given input model system
121121
and input methodological parameters.
@@ -129,45 +129,6 @@ class BaseSimulation(Activity, Time):
129129
links=['https://liusemweb.github.io/mdo/core/1.1/index.html#Calculation']
130130
)
131131

132-
datetime_end = Quantity(
133-
type=Datetime,
134-
description="""
135-
The date and time when this computation ended.
136-
""",
137-
)
138-
139-
cpu1_start = Quantity(
140-
type=np.float64,
141-
unit='second',
142-
description="""
143-
The starting time of the computation on the (first) CPU 1.
144-
""",
145-
)
146-
147-
cpu1_end = Quantity(
148-
type=np.float64,
149-
unit='second',
150-
description="""
151-
The end time of the computation on the (first) CPU 1.
152-
""",
153-
)
154-
155-
wall_start = Quantity(
156-
type=np.float64,
157-
unit='second',
158-
description="""
159-
The internal wall-clock time from the starting of the computation.
160-
""",
161-
)
162-
163-
wall_end = Quantity(
164-
type=np.float64,
165-
unit='second',
166-
description="""
167-
The internal wall-clock time from the end of the computation.
168-
""",
169-
)
170-
171132
program = SubSection(sub_section=Program.m_def, repeats=False)
172133

173134

src/nomad_simulations/schema_packages/model_system.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1500,7 +1500,7 @@ def get_bond_list(self) -> np.ndarray:
15001500
if self._cache.get('bond_list') is not None:
15011501
return self._cache['bond_list']
15021502

1503-
bond_list = np.empty((0, 2), dtype=np.int32)
1503+
bond_list: np.ndarray = np.empty((0, 2), dtype=np.int32)
15041504
# root
15051505
if self.is_root_system():
15061506
bond_list = self.bond_list if self.bond_list is not None else bond_list

src/nomad_simulations/schema_packages/numerical_settings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,9 @@ def resolve_points_and_offset(
381381
offset = np.array([0, 0, 0])
382382
elif self.center == 'Monkhorst-Pack':
383383
try:
384-
points = monkhorst_pack(size=self.grid)
385-
offset = get_monkhorst_pack_size_and_offset(kpts=points)[-1]
384+
points_array = monkhorst_pack(size=self.grid)
385+
points = points_array
386+
offset = get_monkhorst_pack_size_and_offset(kpts=points_array)[-1]
386387
except ValueError:
387388
logger.warning(
388389
'Could not resolve `KMesh.points` and `KMesh.offset` from `KMesh.grid`. ASE `monkhorst_pack` failed.'

src/nomad_simulations/schema_packages/outputs.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,17 @@
2929
Permittivity,
3030
PotentialEnergy,
3131
QuasiparticleWeight,
32+
RadiusOfGyration,
3233
Temperature,
3334
TotalEnergy,
3435
TotalForce,
3536
XASSpectrum,
3637
)
3738

38-
from .common import Time
39+
from .common import SimulationTime
3940

4041

41-
class Outputs(Time):
42+
class Outputs(SimulationTime):
4243
"""
4344
Output properties of a simulation. This base class can be used for inheritance in any of the output properties
4445
defined in this schema.
@@ -64,32 +65,28 @@ class Outputs(Time):
6465
""",
6566
)
6667

68+
absorption_spectra = SubSection(sub_section=AbsorptionSpectrum.m_def, repeats=True)
69+
6770
chemical_potentials = SubSection(sub_section=ChemicalPotential.m_def, repeats=True)
6871

6972
crystal_field_splittings = SubSection(
7073
sub_section=CrystalFieldSplitting.m_def, repeats=True
7174
)
7275

73-
hopping_matrices = SubSection(sub_section=HoppingMatrix.m_def, repeats=True)
76+
electronic_band_gaps = SubSection(sub_section=ElectronicBandGap.m_def, repeats=True)
7477

75-
electronic_eigenvalues = SubSection(
76-
sub_section=ElectronicEigenvalues.m_def, repeats=True
78+
electronic_band_structures = SubSection(
79+
sub_section=ElectronicBandStructure.m_def, repeats=True
7780
)
7881

79-
electronic_band_gaps = SubSection(sub_section=ElectronicBandGap.m_def, repeats=True)
80-
8182
electronic_dos = SubSection(
8283
sub_section=ElectronicDensityOfStates.m_def, repeats=True
8384
)
8485

85-
fermi_surfaces = SubSection(sub_section=FermiSurface.m_def, repeats=True)
86-
87-
electronic_band_structures = SubSection(
88-
sub_section=ElectronicBandStructure.m_def, repeats=True
86+
electronic_eigenvalues = SubSection(
87+
sub_section=ElectronicEigenvalues.m_def, repeats=True
8988
)
9089

91-
occupancies = SubSection(sub_section=Occupancy.m_def, repeats=True)
92-
9390
electronic_greens_functions = SubSection(
9491
sub_section=ElectronicGreensFunction.m_def, repeats=True
9592
)
@@ -98,29 +95,35 @@ class Outputs(Time):
9895
sub_section=ElectronicSelfEnergy.m_def, repeats=True
9996
)
10097

98+
fermi_surfaces = SubSection(sub_section=FermiSurface.m_def, repeats=True)
99+
100+
hopping_matrices = SubSection(sub_section=HoppingMatrix.m_def, repeats=True)
101+
101102
hybridization_functions = SubSection(
102103
sub_section=HybridizationFunction.m_def, repeats=True
103104
)
104105

105-
quasiparticle_weights = SubSection(
106-
sub_section=QuasiparticleWeight.m_def, repeats=True
107-
)
106+
kinetic_energies = SubSection(sub_section=KineticEnergy.m_def, repeats=True)
107+
108+
occupancies = SubSection(sub_section=Occupancy.m_def, repeats=True)
108109

109110
permittivities = SubSection(sub_section=Permittivity.m_def, repeats=True)
110111

111-
absorption_spectra = SubSection(sub_section=AbsorptionSpectrum.m_def, repeats=True)
112+
potential_energies = SubSection(sub_section=PotentialEnergy.m_def, repeats=True)
112113

113-
xas_spectra = SubSection(sub_section=XASSpectrum.m_def, repeats=True)
114+
quasiparticle_weights = SubSection(
115+
sub_section=QuasiparticleWeight.m_def, repeats=True
116+
)
114117

115-
total_energies = SubSection(sub_section=TotalEnergy.m_def, repeats=True)
118+
radii_of_gyration = SubSection(sub_section=RadiusOfGyration.m_def, repeats=True)
116119

117-
kinetic_energies = SubSection(sub_section=KineticEnergy.m_def, repeats=True)
120+
temperatures = SubSection(sub_section=Temperature.m_def, repeats=True)
118121

119-
potential_energies = SubSection(sub_section=PotentialEnergy.m_def, repeats=True)
122+
total_energies = SubSection(sub_section=TotalEnergy.m_def, repeats=True)
120123

121124
total_forces = SubSection(sub_section=TotalForce.m_def, repeats=True)
122125

123-
temperatures = SubSection(sub_section=Temperature.m_def, repeats=True)
126+
xas_spectra = SubSection(sub_section=XASSpectrum.m_def, repeats=True)
124127

125128
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
126129
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

0 commit comments

Comments
 (0)