diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 7baac23bb82..98119af8fe9 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, cast import numpy as np +from monty.dev import deprecated from monty.io import reverse_readfile, zopen from monty.json import MSONable, jsanitize from monty.os.path import zpath @@ -2140,8 +2141,8 @@ def __init__(self, filename: PathLike) -> None: self.final_fr_energy = e_fr_energy self.data: dict = {} - # Read "total number of plane waves", NPLWV: - self.read_pattern( + # Read NPLWV (total number of plane waves) + self._parse_pattern( {"nplwv": r"total plane-waves NPLWV =\s+(\*{6}|\d+)"}, terminate_on_match=True, ) @@ -2152,7 +2153,7 @@ def __init__(self, filename: PathLike) -> None: nplwvs_at_kpoints = [ n - for [n] in self.read_table_pattern( + for [n] in self._parse_table_pattern( r"\n{3}-{104}\n{3}", r".+plane waves:\s+(\*{6,}|\d+)", ( @@ -2172,7 +2173,7 @@ def __init__(self, filename: PathLike) -> None: pass # Read the drift - self.read_pattern( + self._parse_pattern( {"drift": r"total drift:\s+([\.\-\d]+)\s+([\.\-\d]+)\s+([\.\-\d]+)"}, terminate_on_match=False, postprocess=float, @@ -2180,15 +2181,15 @@ def __init__(self, filename: PathLike) -> None: self.drift = self.data.get("drift", []) # Check if calculation is spin polarized - self.read_pattern({"spin": r"ISPIN\s*=\s*2"}) + self._parse_pattern({"spin": r"ISPIN\s*=\s*2"}) self.spin = bool(self.data.get("spin", [])) # Check if calculation is non-collinear - self.read_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"}) + self._parse_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"}) self.noncollinear = bool(self.data.get("noncollinear", [])) # Check if the calculation type is DFPT - self.read_pattern( + self._parse_pattern( {"ibrion": r"IBRION =\s+([\-\d]+)"}, terminate_on_match=True, postprocess=int, @@ -2200,7 +2201,7 @@ def __init__(self, filename: PathLike) -> None: self.dfpt = False # Check if LEPSILON is True and read piezo data if so - self.read_pattern({"epsilon": r"LEPSILON\s*=\s*T"}) + self._parse_pattern({"epsilon": r"LEPSILON\s*=\s*T"}) if self.data.get("epsilon", []): self.lepsilon = True self.read_lepsilon() @@ -2211,7 +2212,7 @@ def __init__(self, filename: PathLike) -> None: self.lepsilon = False # Check if LCALCPOL is True and read polarization data if so - self.read_pattern({"calcpol": r"LCALCPOL\s*=\s*T"}) + self._parse_pattern({"calcpol": r"LCALCPOL\s*=\s*T"}) if self.data.get("calcpol", []): self.lcalcpol = True self.read_lcalcpol() @@ -2223,11 +2224,11 @@ def __init__(self, filename: PathLike) -> None: self.electrostatic_potential: list[float] | None = None self.ngf = None self.sampling_radii: list[float] | None = None - self.read_pattern({"electrostatic": r"average \(electrostatic\) potential at core"}) + self._parse_pattern({"electrostatic": r"average \(electrostatic\) potential at core"}) if self.data.get("electrostatic", []): self.read_electrostatic_potential() - self.read_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"}) + self._parse_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"}) if self.data.get("nmr_cs"): self.nmr_cs = True self.read_chemical_shielding() @@ -2237,7 +2238,7 @@ def __init__(self, filename: PathLike) -> None: else: self.nmr_cs = False - self.read_pattern({"nmr_efg": r"NMR quadrupolar parameters"}) + self._parse_pattern({"nmr_efg": r"NMR quadrupolar parameters"}) if self.data.get("nmr_efg"): self.nmr_efg = True self.read_nmr_efg() @@ -2245,7 +2246,7 @@ def __init__(self, filename: PathLike) -> None: else: self.nmr_efg = False - self.read_pattern( + self._parse_pattern( {"has_onsite_density_matrices": r"onsite density matrix"}, terminate_on_match=True, ) @@ -2270,9 +2271,9 @@ def __init__(self, filename: PathLike) -> None: "Ediel_sol", ): if key == "PAW double counting": - self.read_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"}) + self._parse_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"}) else: - self.read_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"}) + self._parse_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"}) if not self.data[key]: continue final_energy_contribs[key] = sum(map(float, self.data[key][-1])) @@ -2295,7 +2296,7 @@ def _parse_sci_notation(line: str) -> list[float]: return [float(t) for t in match] return [] - def read_pattern( + def _parse_pattern( self, patterns: dict[str, str], reverse: bool = False, @@ -2303,7 +2304,7 @@ def read_pattern( postprocess: Callable = str, ) -> None: r""" - General pattern reading. Use monty's regrep method and take the same + General pattern parser. Use monty's regrep method and take the same arguments. Args: @@ -2334,7 +2335,7 @@ def read_pattern( for key in patterns: self.data[key] = [i[0] for i in matches.get(key, [])] - def read_table_pattern( + def _parse_table_pattern( self, header_pattern: str, row_pattern: str, @@ -2411,6 +2412,14 @@ def read_table_pattern( self.data[attribute_name] = retained_data return retained_data + @deprecated(_parse_pattern) + def read_pattern(self, *args, **kwargs): + self._parse_pattern(*args, **kwargs) + + @deprecated(_parse_table_pattern) + def read_table_pattern(self, *args, **kwargs): + return self._parse_table_pattern(*args, **kwargs) + def as_dict(self) -> dict[str, Any]: """MSONable dict.""" dct = { @@ -2488,18 +2497,18 @@ def read_electrostatic_potential(self) -> None: electrostatic_potential: """ pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"} - self.read_pattern(pattern, postprocess=int) + self._parse_pattern(pattern, postprocess=int) self.ngf = self.data.get("ngf", [[]])[0] pattern = {"radii": r"the test charge radii are((?:\s+[\.\-\d]+)+)"} - self.read_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str) + self._parse_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str) self.sampling_radii = [*map(float, self.data["radii"][0][0].split())] header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)" table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pots: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern) + pots: list = self._parse_table_pattern(header_pattern, table_pattern, footer_pattern) _pots: str = "".join(itertools.chain.from_iterable(pots)) pots = re.findall(r"\s+\d+\s*([\.\-\d]+)+", _pots) @@ -2591,11 +2600,11 @@ def read_chemical_shielding(self) -> None: row_pattern = r"\d+(?:\s+[-]?\d+\.\d+){3}\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3) footer_pattern = r"-{50,}\s*$" h1 = header_pattern + first_part_pattern - cs_valence_only = self.read_table_pattern( + cs_valence_only = self._parse_table_pattern( h1, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) h2 = header_pattern + swallon_valence_body_pattern - cs_valence_and_core = self.read_table_pattern( + cs_valence_and_core = self._parse_table_pattern( h2, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) self.data["chemical_shielding"] = { @@ -2617,7 +2626,7 @@ def read_cs_g0_contribution(self) -> None: ) row_pattern = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3) footer_pattern = r"\s+-{50,}\s*$" - self.read_table_pattern( + self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2635,7 +2644,7 @@ def read_cs_core_contribution(self) -> None: header_pattern = r"^\s+Core NMR properties\s*$\n\n^\s+typ\s+El\s+Core shift \(ppm\)\s*$\n^\s+-{20,}$\n" row_pattern = r"\d+\s+(?P[A-Z][a-z]?\w?)\s+(?P[-]?\d+\.\d+)" footer_pattern = r"\s+-{20,}\s*$" - self.read_table_pattern( + self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2696,7 +2705,7 @@ def read_nmr_efg_tensor(self) -> list[NDArray]: row_pattern = r"\d+\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)" footer_pattern = r"-*\n" - data = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + data = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) tensors = [make_symmetric_matrix_from_upper_tri(d) for d in data] self.data["unsym_efg_tensor"] = tensors return tensors @@ -2721,7 +2730,7 @@ def read_nmr_efg(self) -> None: r"\d+\s+(?P[-]?\d+\.\d+)\s+(?P[-]?\d+\.\d+)\s+(?P[-]?\d+\.\d+)" ) footer_pattern = r"-{50,}\s*$" - self.read_table_pattern( + self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2740,7 +2749,7 @@ def read_elastic_tensor(self) -> None: header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s+Direction\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[X-Z][X-Z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"\-+" - et_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + et_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) self.data["elastic_tensor"] = et_table def read_piezo_tensor(self) -> None: @@ -2752,7 +2761,7 @@ def read_piezo_tensor(self) -> None: header_pattern = r"PIEZOELECTRIC TENSOR for field in x, y, z\s+\(C/m\^2\)\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[x-z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"BORN EFFECTIVE" - pt_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + pt_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) self.data["piezo_tensor"] = pt_table def read_onsite_density_matrices(self) -> None: @@ -2766,7 +2775,7 @@ def read_onsite_density_matrices(self) -> None: header_pattern = r"spin component 1\n" row_pattern = r"[^\S\r\n]*(?:(-?[\d.]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?" footer_pattern = r"\nspin component 2" - spin1_component = self.read_table_pattern( + spin1_component = self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2781,7 +2790,7 @@ def read_onsite_density_matrices(self) -> None: header_pattern = r"spin component 2\n" row_pattern = r"[^\S\r\n]*(?:([\d.-]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?" footer_pattern = r"\n occupancies and eigenvectors" - spin2_component = self.read_table_pattern( + spin2_component = self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2810,7 +2819,7 @@ def read_corrections( dipol_quadrupol_correction: TODO: fill details. """ patterns = {"dipol_quadrupol_correction": r"dipol\+quadrupol energy correction\s+([\d\-\.]+)"} - self.read_pattern( + self._parse_pattern( patterns, reverse=reverse, terminate_on_match=terminate_on_match, @@ -2845,7 +2854,7 @@ def read_neb( "tangent_force": r"(NEB: projections on to tangent \(spring, REAL\)\s+\S+|tangential force \(eV/A\))\s+" r"([\d\-\.]+)", } - self.read_pattern( + self._parse_pattern( patterns, reverse=reverse, terminate_on_match=terminate_on_match, @@ -3552,7 +3561,7 @@ def read_fermi_contact_shift(self) -> None: ) row_pattern1 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 5) footer_pattern = r"\-+" - fch_table = self.read_table_pattern( + fch_table = self._parse_table_pattern( header_pattern1, row_pattern1, footer_pattern, @@ -3568,7 +3577,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern2 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 6) - dh_table = self.read_table_pattern( + dh_table = self._parse_table_pattern( header_pattern2, row_pattern2, footer_pattern, @@ -3585,7 +3594,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern3 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 4) - th_table = self.read_table_pattern( + th_table = self._parse_table_pattern( header_pattern3, row_pattern3, footer_pattern, diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index fe8b5ad83dc..7ddf2aa56b7 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -1382,21 +1382,21 @@ def test_energies(self): assert outcar.final_energy_wo_entrp == approx(-15.83863167) assert outcar.final_fr_energy == approx(-15.92115453) - def test_read_table_pattern(self): + def test_parse_table_pattern(self): outcar = Outcar(f"{VASP_OUT_DIR}/OUTCAR.gz") header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)" table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pots = outcar.read_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True) + pots = outcar._parse_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True) ref_last = [ [" 1 -26.0704 2 -45.5046 3 -45.5046 4 -72.9539 5 -73.0621"], [" 6 -72.9539 7 -73.0621"], ] assert pots == ref_last - pots = outcar.read_table_pattern( + pots = outcar._parse_table_pattern( header_pattern, table_pattern, footer_pattern, @@ -1413,7 +1413,7 @@ def test_read_table_pattern(self): ValueError, match="last_one_only and first_one_only options are incompatible", ): - outcar.read_table_pattern( + outcar._parse_table_pattern( header_pattern, table_pattern, footer_pattern,