Skip to content

Commit 6828c25

Browse files
committed
MolecularOrbitals according to TREXIO format
1 parent ed328f3 commit 6828c25

File tree

1 file changed

+183
-0
lines changed

1 file changed

+183
-0
lines changed
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import itertools
2+
from collections.abc import Iterable
3+
from typing import TYPE_CHECKING, Any, Optional
4+
5+
if TYPE_CHECKING:
6+
from nomad.datamodel.datamodel import EntryArchive
7+
from structlog.stdlib import BoundLogger
8+
9+
import numpy as np
10+
from nomad.datamodel.data import ArchiveSection
11+
from nomad.datamodel.metainfo.basesections.v2 import Entity
12+
from nomad.metainfo import URL, MEnum, Quantity, Reference, SectionProxy
13+
14+
from nomad_simulations.schema_packages.physical_property import PhysicalProperty
15+
16+
17+
class MolecularOrbitals(PhysicalProperty):
18+
"""
19+
Molecular-orbital eigenstates expressed in an atom-centred AO basis.
20+
21+
Every quantity is either directly mappable to the TREXIO *mo* group or
22+
provides auxiliary metadata needed by NOMAD tooling. Shapes are expressed
23+
in Fortran/column-major convention to match TREXIO and most quantum-code
24+
outputs.
25+
26+
The TREXIO format:
27+
Posenitsky et al., J. Chem. Phys. 158, 174801 (2023)
28+
29+
----------
30+
Quantities
31+
-----------------
32+
``basis_set_ref`` Reference to the AO basis section.
33+
``mo_spin`` Per-orbital spin index (TREXIO-style unified list).
34+
``n_mo`` Number of molecular orbitals stored.
35+
``n_ao`` Size of the AO basis.
36+
``mo_energies`` εᵢ orbital energies (eV).
37+
``mo_occupations`` nᵢ occupation numbers.
38+
``mo_coefficients`` Real part of AO→MO coefficient matrix C.
39+
``mo_coefficients_im`` Imaginary part of C (optional).
40+
``mo_class`` Role of each MO: Core/Inactive/Active/Virtual/Deleted.
41+
``mo_symmetry`` Irreducible-representation labels (e.g. *a₁*, *b₂*).
42+
``mo_type`` Classification of entire set: canonical/natural/…
43+
44+
"""
45+
46+
# ------------------------------------------------------------------ #
47+
# References #
48+
# ------------------------------------------------------------------ #
49+
basis_set_ref = Quantity(
50+
type=Reference(SectionProxy('AtomCenteredBasisSet')),
51+
description="""
52+
Reference to the atom-centered basis set in which these molecular
53+
orbitals are expanded.
54+
""",
55+
)
56+
57+
# ------------------------------------------------------------------ #
58+
# Dimension-defining scalars #
59+
# ------------------------------------------------------------------ #
60+
n_mo = Quantity(
61+
type=np.int32,
62+
description='Number of molecular orbitals stored.',
63+
)
64+
65+
n_ao = Quantity(
66+
type=np.int32,
67+
description='Number of atomic orbitals (size of AO basis).',
68+
)
69+
70+
# ------------------------------------------------------------------ #
71+
# Per-orbital mandatory metadata #
72+
# ------------------------------------------------------------------ #
73+
mo_spin = Quantity(
74+
type=np.int32,
75+
shape=['n_mo'],
76+
description="""
77+
Spin index of each molecular orbital: 0 for α-spin, 1 for β-spin.
78+
""",
79+
)
80+
81+
mo_energies = Quantity(
82+
type=np.float64,
83+
unit='electron_volt',
84+
shape=['n_mo'],
85+
description="""
86+
Orbital energies for each MO. In a canonical SCF these are the eigenvalues
87+
of the (Fock) Hamiltonian; in correlated frameworks they may be natural-orbital
88+
energies or any other chosen set.
89+
""",
90+
)
91+
92+
mo_occupations = Quantity(
93+
type=np.float64,
94+
shape=['n_mo'],
95+
description="""
96+
Occupation numbers for each MO. Closed-shell codes will typically give 2.0
97+
for occupied and 0.0 for virtual orbitals; unrestricted codes use two channels.
98+
""",
99+
)
100+
101+
mo_class = Quantity(
102+
type=MEnum('core', 'inactive', 'active', 'virtual', 'deleted'),
103+
shape=['n_mo'],
104+
description="""
105+
Role of each MO within a correlated calculation or active-space
106+
protocol:
107+
108+
* core : energy-frozen doubly-occupied
109+
* inactive : doubly-occupied but variationally optimised
110+
* active : part of the active space
111+
* virtual : unoccupied (correlated) orbital
112+
* deleted : pruned for technical reasons
113+
""",
114+
)
115+
116+
mo_symmetry = Quantity(
117+
type=str,
118+
shape=['n_mo'],
119+
description="""
120+
Symmetry label of each MO in the molecule's point group
121+
(e.g. *a₁*, *b₂u*, *pi_g*). Leave empty for systems with
122+
no detected symmetry.
123+
""",
124+
)
125+
126+
# ------------------------------------------------------------------ #
127+
# AO → MO coefficient matrices #
128+
# ------------------------------------------------------------------ #
129+
mo_coefficients = Quantity(
130+
type=np.float64,
131+
shape=['n_mo', 'n_ao'],
132+
description="""
133+
The AO→MO coefficient matrix **C**, such that
134+
ψ_i(r) = ∑_μ C[i,μ] φ_μ(r).
135+
Row index i runs over MOs, column index μ runs over AOs in `basis_set_ref`.
136+
""",
137+
)
138+
139+
mo_coefficients_im = Quantity(
140+
type=np.float64,
141+
shape=['n_mo', 'n_ao'],
142+
description="""
143+
Imaginary component of the AO→MO coefficient matrix **C**.
144+
Combine it with `mo_coefficients` to obtain the full complex matrix:
145+
C_complex = mo_coefficients + 1j * mo_coefficients_im
146+
Leave this quantity unset (or an empty array) when the wave-function
147+
is strictly real, as in non-relativistic γ-point calculations.
148+
""",
149+
)
150+
151+
# ------------------------------------------------------------------ #
152+
# Whole-set classification #
153+
# ------------------------------------------------------------------ #
154+
mo_type = Quantity(
155+
type=MEnum('canonical', 'natural', 'localized', 'hybrid'),
156+
default='canonical',
157+
description="""
158+
Classification of these orbitals:
159+
- canonical : standard SCF eigenfunctions
160+
- natural : eigenfunctions of the 1-RDM
161+
- localized : after a localization transform (Boys, Pipek-Mezey, …)
162+
- hybrid : e.g. post-HF (CASSCF) orbitals, etc.
163+
""",
164+
)
165+
166+
def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
167+
"""
168+
Infer `n_mo` / `n_ao` from supplied arrays when absent.
169+
"""
170+
super().normalize(archive, logger)
171+
172+
# ---------- infer n_mo ----------
173+
if self.n_mo is None:
174+
if self.mo_coefficients is not None:
175+
self.n_mo = int(self.mo_coefficients.shape[0])
176+
elif self.mo_spin is not None:
177+
self.n_mo = len(self.mo_spin)
178+
elif self.mo_energies is not None:
179+
self.n_mo = len(self.mo_energies)
180+
181+
# ---------- infer n_ao ----------
182+
if self.n_ao is None and self.mo_coefficients is not None:
183+
self.n_ao = int(self.mo_coefficients.shape[1])

0 commit comments

Comments
 (0)