Skip to content

Commit

Permalink
NEED CONFIRM: deprecate read_pattern, rename to _parse_pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielYang59 committed Nov 9, 2024
1 parent 0184d9c commit 404ed50
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 40 deletions.
81 changes: 45 additions & 36 deletions src/pymatgen/io/vasp/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import TYPE_CHECKING, cast

import numpy as np
from monty.dev import deprecated
from monty.io import reverse_readfile, zopen
from monty.json import MSONable, jsanitize
from monty.os.path import zpath
Expand Down Expand Up @@ -2140,8 +2141,8 @@ def __init__(self, filename: PathLike) -> None:
self.final_fr_energy = e_fr_energy
self.data: dict = {}

# Read "total number of plane waves", NPLWV:
self.read_pattern(
# Read NPLWV (total number of plane waves)
self._parse_pattern(
{"nplwv": r"total plane-waves NPLWV =\s+(\*{6}|\d+)"},
terminate_on_match=True,
)
Expand All @@ -2152,7 +2153,7 @@ def __init__(self, filename: PathLike) -> None:

nplwvs_at_kpoints = [
n
for [n] in self.read_table_pattern(
for [n] in self._parse_table_pattern(
r"\n{3}-{104}\n{3}",
r".+plane waves:\s+(\*{6,}|\d+)",
(
Expand All @@ -2172,23 +2173,23 @@ def __init__(self, filename: PathLike) -> None:
pass

# Read the drift
self.read_pattern(
self._parse_pattern(
{"drift": r"total drift:\s+([\.\-\d]+)\s+([\.\-\d]+)\s+([\.\-\d]+)"},
terminate_on_match=False,
postprocess=float,
)
self.drift = self.data.get("drift", [])

# Check if calculation is spin polarized
self.read_pattern({"spin": r"ISPIN\s*=\s*2"})
self._parse_pattern({"spin": r"ISPIN\s*=\s*2"})
self.spin = bool(self.data.get("spin", []))

# Check if calculation is non-collinear
self.read_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"})
self._parse_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"})
self.noncollinear = bool(self.data.get("noncollinear", []))

# Check if the calculation type is DFPT
self.read_pattern(
self._parse_pattern(
{"ibrion": r"IBRION =\s+([\-\d]+)"},
terminate_on_match=True,
postprocess=int,
Expand All @@ -2200,7 +2201,7 @@ def __init__(self, filename: PathLike) -> None:
self.dfpt = False

# Check if LEPSILON is True and read piezo data if so
self.read_pattern({"epsilon": r"LEPSILON\s*=\s*T"})
self._parse_pattern({"epsilon": r"LEPSILON\s*=\s*T"})
if self.data.get("epsilon", []):
self.lepsilon = True
self.read_lepsilon()
Expand All @@ -2211,7 +2212,7 @@ def __init__(self, filename: PathLike) -> None:
self.lepsilon = False

# Check if LCALCPOL is True and read polarization data if so
self.read_pattern({"calcpol": r"LCALCPOL\s*=\s*T"})
self._parse_pattern({"calcpol": r"LCALCPOL\s*=\s*T"})
if self.data.get("calcpol", []):
self.lcalcpol = True
self.read_lcalcpol()
Expand All @@ -2223,11 +2224,11 @@ def __init__(self, filename: PathLike) -> None:
self.electrostatic_potential: list[float] | None = None
self.ngf = None
self.sampling_radii: list[float] | None = None
self.read_pattern({"electrostatic": r"average \(electrostatic\) potential at core"})
self._parse_pattern({"electrostatic": r"average \(electrostatic\) potential at core"})
if self.data.get("electrostatic", []):
self.read_electrostatic_potential()

self.read_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"})
self._parse_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"})
if self.data.get("nmr_cs"):
self.nmr_cs = True
self.read_chemical_shielding()
Expand All @@ -2237,15 +2238,15 @@ def __init__(self, filename: PathLike) -> None:
else:
self.nmr_cs = False

self.read_pattern({"nmr_efg": r"NMR quadrupolar parameters"})
self._parse_pattern({"nmr_efg": r"NMR quadrupolar parameters"})
if self.data.get("nmr_efg"):
self.nmr_efg = True
self.read_nmr_efg()
self.read_nmr_efg_tensor()
else:
self.nmr_efg = False

self.read_pattern(
self._parse_pattern(
{"has_onsite_density_matrices": r"onsite density matrix"},
terminate_on_match=True,
)
Expand All @@ -2270,9 +2271,9 @@ def __init__(self, filename: PathLike) -> None:
"Ediel_sol",
):
if key == "PAW double counting":
self.read_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"})
self._parse_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"})
else:
self.read_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"})
self._parse_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"})
if not self.data[key]:
continue
final_energy_contribs[key] = sum(map(float, self.data[key][-1]))
Expand All @@ -2295,15 +2296,15 @@ def _parse_sci_notation(line: str) -> list[float]:
return [float(t) for t in match]
return []

def read_pattern(
def _parse_pattern(
self,
patterns: dict[str, str],
reverse: bool = False,
terminate_on_match: bool = False,
postprocess: Callable = str,
) -> None:
r"""
General pattern reading. Use monty's regrep method and take the same
General pattern parser. Use monty's regrep method and take the same
arguments.
Args:
Expand Down Expand Up @@ -2334,7 +2335,7 @@ def read_pattern(
for key in patterns:
self.data[key] = [i[0] for i in matches.get(key, [])]

def read_table_pattern(
def _parse_table_pattern(
self,
header_pattern: str,
row_pattern: str,
Expand Down Expand Up @@ -2411,6 +2412,14 @@ def read_table_pattern(
self.data[attribute_name] = retained_data
return retained_data

@deprecated(_parse_pattern)
def read_pattern(self, *args, **kwargs):
self._parse_pattern(*args, **kwargs)

@deprecated(_parse_table_pattern)
def read_table_pattern(self, *args, **kwargs):
return self._parse_table_pattern(*args, **kwargs)

def as_dict(self) -> dict[str, Any]:
"""MSONable dict."""
dct = {
Expand Down Expand Up @@ -2488,18 +2497,18 @@ def read_electrostatic_potential(self) -> None:
electrostatic_potential:
"""
pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"}
self.read_pattern(pattern, postprocess=int)
self._parse_pattern(pattern, postprocess=int)
self.ngf = self.data.get("ngf", [[]])[0]

pattern = {"radii": r"the test charge radii are((?:\s+[\.\-\d]+)+)"}
self.read_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str)
self._parse_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str)
self.sampling_radii = [*map(float, self.data["radii"][0][0].split())]

header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)"
table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)"
footer_pattern = r"\s+E-fermi :"

pots: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern)
pots: list = self._parse_table_pattern(header_pattern, table_pattern, footer_pattern)
_pots: str = "".join(itertools.chain.from_iterable(pots))

pots = re.findall(r"\s+\d+\s*([\.\-\d]+)+", _pots)
Expand Down Expand Up @@ -2591,11 +2600,11 @@ def read_chemical_shielding(self) -> None:
row_pattern = r"\d+(?:\s+[-]?\d+\.\d+){3}\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3)
footer_pattern = r"-{50,}\s*$"
h1 = header_pattern + first_part_pattern
cs_valence_only = self.read_table_pattern(
cs_valence_only = self._parse_table_pattern(
h1, row_pattern, footer_pattern, postprocess=float, last_one_only=True
)
h2 = header_pattern + swallon_valence_body_pattern
cs_valence_and_core = self.read_table_pattern(
cs_valence_and_core = self._parse_table_pattern(
h2, row_pattern, footer_pattern, postprocess=float, last_one_only=True
)
self.data["chemical_shielding"] = {
Expand All @@ -2617,7 +2626,7 @@ def read_cs_g0_contribution(self) -> None:
)
row_pattern = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3)
footer_pattern = r"\s+-{50,}\s*$"
self.read_table_pattern(
self._parse_table_pattern(
header_pattern,
row_pattern,
footer_pattern,
Expand All @@ -2635,7 +2644,7 @@ def read_cs_core_contribution(self) -> None:
header_pattern = r"^\s+Core NMR properties\s*$\n\n^\s+typ\s+El\s+Core shift \(ppm\)\s*$\n^\s+-{20,}$\n"
row_pattern = r"\d+\s+(?P<element>[A-Z][a-z]?\w?)\s+(?P<shift>[-]?\d+\.\d+)"
footer_pattern = r"\s+-{20,}\s*$"
self.read_table_pattern(
self._parse_table_pattern(
header_pattern,
row_pattern,
footer_pattern,
Expand Down Expand Up @@ -2696,7 +2705,7 @@ def read_nmr_efg_tensor(self) -> list[NDArray]:
row_pattern = r"\d+\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)"
footer_pattern = r"-*\n"

data = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float)
data = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float)
tensors = [make_symmetric_matrix_from_upper_tri(d) for d in data]
self.data["unsym_efg_tensor"] = tensors
return tensors
Expand All @@ -2721,7 +2730,7 @@ def read_nmr_efg(self) -> None:
r"\d+\s+(?P<cq>[-]?\d+\.\d+)\s+(?P<eta>[-]?\d+\.\d+)\s+(?P<nuclear_quadrupole_moment>[-]?\d+\.\d+)"
)
footer_pattern = r"-{50,}\s*$"
self.read_table_pattern(
self._parse_table_pattern(
header_pattern,
row_pattern,
footer_pattern,
Expand All @@ -2740,7 +2749,7 @@ def read_elastic_tensor(self) -> None:
header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s+Direction\s+([X-Z][X-Z]\s+)+\-+"
row_pattern = r"[X-Z][X-Z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6)
footer_pattern = r"\-+"
et_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float)
et_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float)
self.data["elastic_tensor"] = et_table

def read_piezo_tensor(self) -> None:
Expand All @@ -2752,7 +2761,7 @@ def read_piezo_tensor(self) -> None:
header_pattern = r"PIEZOELECTRIC TENSOR for field in x, y, z\s+\(C/m\^2\)\s+([X-Z][X-Z]\s+)+\-+"
row_pattern = r"[x-z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6)
footer_pattern = r"BORN EFFECTIVE"
pt_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float)
pt_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float)
self.data["piezo_tensor"] = pt_table

def read_onsite_density_matrices(self) -> None:
Expand All @@ -2766,7 +2775,7 @@ def read_onsite_density_matrices(self) -> None:
header_pattern = r"spin component 1\n"
row_pattern = r"[^\S\r\n]*(?:(-?[\d.]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?"
footer_pattern = r"\nspin component 2"
spin1_component = self.read_table_pattern(
spin1_component = self._parse_table_pattern(
header_pattern,
row_pattern,
footer_pattern,
Expand All @@ -2781,7 +2790,7 @@ def read_onsite_density_matrices(self) -> None:
header_pattern = r"spin component 2\n"
row_pattern = r"[^\S\r\n]*(?:([\d.-]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?"
footer_pattern = r"\n occupancies and eigenvectors"
spin2_component = self.read_table_pattern(
spin2_component = self._parse_table_pattern(
header_pattern,
row_pattern,
footer_pattern,
Expand Down Expand Up @@ -2810,7 +2819,7 @@ def read_corrections(
dipol_quadrupol_correction: TODO: fill details.
"""
patterns = {"dipol_quadrupol_correction": r"dipol\+quadrupol energy correction\s+([\d\-\.]+)"}
self.read_pattern(
self._parse_pattern(
patterns,
reverse=reverse,
terminate_on_match=terminate_on_match,
Expand Down Expand Up @@ -2845,7 +2854,7 @@ def read_neb(
"tangent_force": r"(NEB: projections on to tangent \(spring, REAL\)\s+\S+|tangential force \(eV/A\))\s+"
r"([\d\-\.]+)",
}
self.read_pattern(
self._parse_pattern(
patterns,
reverse=reverse,
terminate_on_match=terminate_on_match,
Expand Down Expand Up @@ -3552,7 +3561,7 @@ def read_fermi_contact_shift(self) -> None:
)
row_pattern1 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 5)
footer_pattern = r"\-+"
fch_table = self.read_table_pattern(
fch_table = self._parse_table_pattern(
header_pattern1,
row_pattern1,
footer_pattern,
Expand All @@ -3568,7 +3577,7 @@ def read_fermi_contact_shift(self) -> None:
r"\s*\-+"
)
row_pattern2 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 6)
dh_table = self.read_table_pattern(
dh_table = self._parse_table_pattern(
header_pattern2,
row_pattern2,
footer_pattern,
Expand All @@ -3585,7 +3594,7 @@ def read_fermi_contact_shift(self) -> None:
r"\s*\-+"
)
row_pattern3 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 4)
th_table = self.read_table_pattern(
th_table = self._parse_table_pattern(
header_pattern3,
row_pattern3,
footer_pattern,
Expand Down
8 changes: 4 additions & 4 deletions tests/io/vasp/test_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,21 +1382,21 @@ def test_energies(self):
assert outcar.final_energy_wo_entrp == approx(-15.83863167)
assert outcar.final_fr_energy == approx(-15.92115453)

def test_read_table_pattern(self):
def test_parse_table_pattern(self):
outcar = Outcar(f"{VASP_OUT_DIR}/OUTCAR.gz")

header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)"
table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)"
footer_pattern = r"\s+E-fermi :"

pots = outcar.read_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True)
pots = outcar._parse_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True)
ref_last = [
[" 1 -26.0704 2 -45.5046 3 -45.5046 4 -72.9539 5 -73.0621"],
[" 6 -72.9539 7 -73.0621"],
]
assert pots == ref_last

pots = outcar.read_table_pattern(
pots = outcar._parse_table_pattern(
header_pattern,
table_pattern,
footer_pattern,
Expand All @@ -1413,7 +1413,7 @@ def test_read_table_pattern(self):
ValueError,
match="last_one_only and first_one_only options are incompatible",
):
outcar.read_table_pattern(
outcar._parse_table_pattern(
header_pattern,
table_pattern,
footer_pattern,
Expand Down

0 comments on commit 404ed50

Please sign in to comment.