From 869b006319739c5b4731e2694bb0f7065201257e Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 17:40:54 +0800 Subject: [PATCH 01/16] improve docstring --- src/pymatgen/io/vasp/outputs.py | 185 +++++++++++++++++++------------- 1 file changed, 108 insertions(+), 77 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 7d74758bf89..5cad19905d4 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -1927,51 +1927,54 @@ class Outcar: are always present. Attributes: - magnetization (tuple): Magnetization on each ion as a tuple of dict, e.g. - ({"d": 0.0, "p": 0.003, "s": 0.002, "tot": 0.005}, ... ) - chemical_shielding (dict): Chemical shielding on each ion as a dictionary with core and valence contributions. - unsym_cs_tensor (list): Unsymmetrized chemical shielding tensor matrixes on each ion as a list. + magnetization (tuple[dict]): Magnetization on each ion, e.g. + ({"d": 0.0, "p": 0.003, "s": 0.002, "tot": 0.005}, ... ). + chemical_shielding (dict): Chemical shielding on each ion with core and valence contributions. + unsym_cs_tensor (list): Unsymmetrized chemical shielding tensor matrixes on each ion. e.g. [[[sigma11, sigma12, sigma13], [sigma21, sigma22, sigma23], [sigma31, sigma32, sigma33]], ...] - cs_g0_contribution (np.array): G=0 contribution to chemical shielding. 2D rank 3 matrix. - cs_core_contribution (dict): Core contribution to chemical shielding. dict. e.g. + cs_g0_contribution (NDArray): G=0 contribution to chemical shielding. 2D rank 3 matrix. + cs_core_contribution (dict): Core contribution to chemical shielding. e.g. {'Mg': -412.8, 'C': -200.5, 'O': -271.1} - efg (tuple): Electric Field Gradient (EFG) tensor on each ion as a tuple of dict, e.g. + efg (tuple[dict]): Electric Field Gradient (EFG) tensor on each ion, e.g. ({"cq": 0.1, "eta", 0.2, "nuclear_quadrupole_moment": 0.3}, {"cq": 0.7, "eta", 0.8, "nuclear_quadrupole_moment": 0.9}, ...) - charge (tuple): Charge on each ion as a tuple of dict, e.g. + charge (tuple[dict]): Charge on each ion, e.g. ({"p": 0.154, "s": 0.078, "d": 0.0, "tot": 0.232}, ...) is_stopped (bool): True if OUTCAR is from a stopped run (using STOPCAR, see VASP Manual). - run_stats (dict): Various useful run stats as a dict including "System time (sec)", "Total CPU time used (sec)", - "Elapsed time (sec)", "Maximum memory used (kb)", "Average memory used (kb)", "User time (sec)", "cores". - elastic_tensor (np.array): Total elastic moduli (Kbar) is given in a 6x6 array matrix. - drift (np.array): Total drift for each step in eV/Atom. + run_stats (dict[str, float | None]): Various useful run stats including "System time (sec)", + "Total CPU time used (sec)", "Elapsed time (sec)", "Maximum memory used (kb)", + "Average memory used (kb)", "User time (sec)", "cores". + elastic_tensor (NDArray): Total elastic moduli (Kbar) is given in a 6x6 array matrix. + drift (NDArray): Total drift for each step in eV/Atom. ngf (tuple): Dimensions for the Augmentation grid. - sampling_radii (np.array): Size of the sampling radii in VASP for the test charges for the electrostatic + sampling_radii (NDArray): Size of the sampling radii in VASP for the test charges for the electrostatic potential at each atom. Total array size is the number of elements present in the calculation. - electrostatic_potential (np.array): Average electrostatic potential at each atomic position in order of + electrostatic_potential (NDArray): Average electrostatic potential at each atomic position in order of the atoms in POSCAR. - final_energy_contribs (dict): Individual contributions to the total final energy as a dictionary. + final_energy_contribs (dict[str, float]): Individual contributions to the total final energy. Include contributions from keys, e.g.: {'DENC': -505778.5184347, 'EATOM': 15561.06492564, 'EBANDS': -804.53201231, 'EENTRO': -0.08932659, 'EXHF': 0.0, 'Ediel_sol': 0.0, 'PAW double counting': 664.6726974100002, 'PSCENC': 742.48691646, 'TEWEN': 489742.86847338, 'XCENC': -169.64189814} efermi (float): Fermi energy. - filename (str): Filename. + filename (PathLike): Filename. final_energy (float): Final energy after extrapolation of sigma back to 0, i.e. energy(sigma->0). final_energy_wo_entrp (float): Final energy before extrapolation of sigma, i.e. energy without entropy. final_fr_energy (float): Final "free energy", i.e. free energy TOTEN. has_onsite_density_matrices (bool): Whether onsite density matrices have been set. lcalcpol (bool): If LCALCPOL has been set. lepsilon (bool): If LEPSILON has been set. - nelect (float): Returns the number of electrons in the calculation. - spin (bool): If spin-polarization was enabled via ISPIN. + nelect (float): The number of electrons in the calculation. + spin (bool): If spin-polarization is enabled via ISPIN. total_mag (float): Total magnetization (in terms of the number of unpaired electrons). - One can then call a specific reader depending on the type of run being - performed. These are currently: read_igpar(), read_lepsilon() and - read_lcalcpol(), read_core_state_eign(), read_avg_core_pot(). - - See the documentation of those methods for more documentation. + One can then call a specific reader depending on the type of run being performed. + These are currently (see the documentation of those methods for more details): + - read_igpar + - read_lepsilon + - read_lcalcpol + - read_core_state_eign + - read_avg_core_pot Authors: Rickard Armiento, Shyue Ping Ong """ @@ -2287,7 +2290,7 @@ def read_pattern( arguments. Args: - patterns (dict): A dict of patterns, e.g. + patterns (dict[str, str]): A dict of patterns, e.g. {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"}. reverse (bool): Read files in reverse. Defaults to false. Useful for large files, esp OUTCARs, especially when used with @@ -2297,12 +2300,12 @@ def read_pattern( postprocess (Callable): A post processing function to convert all matches. Defaults to str, i.e., no change. - Renders accessible: + Renders accessible from self.data: Any attribute in patterns. For example, {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"} will set the value of self.data["energy"] = [[-1234], [-3453], ...], to the - results from regex and postprocess. Note that the returned values - are lists of lists, because you can grep multiple items on one line. + results from regex and postprocess. Note that the values + are list[list], because you can grep multiple items on one line. """ matches = regrep( self.filename, @@ -2323,7 +2326,7 @@ def read_table_pattern( attribute_name: str | None = None, last_one_only: bool = True, first_one_only: bool = False, - ) -> list: + ) -> list: # TODO: clarify table-like data type r"""Parse table-like data. A table composes of three parts: header, main body, footer. All the data matches "row pattern" in the main body will be returned. @@ -2392,7 +2395,14 @@ def read_table_pattern( return retained_data def read_electrostatic_potential(self) -> None: - """Parse the eletrostatic potential for the last ionic step.""" + """Parse the eletrostatic potential for the last ionic step. + + Renders accessible as attributes: TODO: + ngf: TODO: double check + sampling_radii: TODO: double check + radii: TODO: double check + electrostatic_potential: + """ pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"} self.read_pattern(pattern, postprocess=int) self.ngf = self.data.get("ngf", [[]])[0] @@ -2423,7 +2433,7 @@ def _parse_sci_notation(line: str) -> list[float]: line: line to parse. Returns: - list[float]: numbers if found, empty if not. + list[float]: numbers if found, empty list if not. """ if match := re.findall(r"[\.\-\d]+E[\+\-]\d{2}", line): return [float(t) for t in match] @@ -2434,6 +2444,10 @@ def read_freq_dielectric(self) -> None: Parse the frequency dependent dielectric function (obtained with LOPTICS). Frequencies (in eV) are in self.frequencies, and dielectric tensor function is given as self.dielectric_tensor_function. + + Renders accessible as attributes: TODO: + frequencies: + dielectric_tensor_function: """ plasma_pattern = r"plasma frequency squared.*" dielectric_pattern = ( @@ -2492,8 +2506,9 @@ def read_chemical_shielding(self) -> None: """Parse the NMR chemical shieldings data. Only the second part "absolute, valence and core" will be parsed. And only the three right most field (ISO_SHIELDING, SPAN, SKEW) will be retrieved. - Set self.data["chemical_shielding"] as: - List of chemical shieldings in the order of atoms from the OUTCAR. Maryland notation is adopted. + Renders accessible from self.data: + chemical_shielding (list): Chemical shieldings in the order of atoms + from the OUTCAR. Maryland notation is adopted. """ header_pattern = ( r"\s+CSA tensor \(J\. Mason, Solid State Nucl\. Magn\. Reson\. 2, " @@ -2524,8 +2539,8 @@ def read_chemical_shielding(self) -> None: def read_cs_g0_contribution(self) -> None: """Parse the G0 contribution of NMR chemical shielding. - Set self.data["cs_g0_contribution"] as: - list[list]: G0 contribution matrix. + Renders accessible from self.data: + cs_g0_contribution (list[list]): G0 contribution matrix. """ header_pattern = ( r"^\s+G\=0 CONTRIBUTION TO CHEMICAL SHIFT \(field along BDIR\)\s+$\n" @@ -2547,8 +2562,8 @@ def read_cs_g0_contribution(self) -> None: def read_cs_core_contribution(self) -> None: """Parse the core contribution of NMR chemical shielding. - Set self.data["cs_core_contribution"] as: - list[list]: G0 contribution matrix. + Renders accessible from self.data: + cs_core_contribution (list[list]): G0 contribution matrix. """ header_pattern = r"^\s+Core NMR properties\s*$\n\n^\s+typ\s+El\s+Core shift \(ppm\)\s*$\n^\s+-{20,}$\n" row_pattern = r"\d+\s+(?P[A-Z][a-z]?\w?)\s+(?P[-]?\d+\.\d+)" @@ -2567,8 +2582,8 @@ def read_cs_core_contribution(self) -> None: def read_cs_raw_symmetrized_tensors(self) -> None: """Parse the matrix form of NMR tensor before corrected to table. - Returns: - nsymmetrized tensors list in the order of atoms. + Renders accessible from self.data: TODO: + unsym_cs_tensor (list[list]): nsymmetrized tensors in the order of atoms. """ header_pattern = r"\s+-{50,}\s+\s+Absolute Chemical Shift tensors\s+\s+-{50,}$" first_part_pattern = r"\s+UNSYMMETRIZED TENSORS\s+$" @@ -2605,7 +2620,7 @@ def read_nmr_efg_tensor(self) -> list[NDArray]: """Parses the NMR Electric Field Gradient Raw Tensors. Returns: - A list of Electric Field Gradient Tensors in the order of Atoms from OUTCAR. + list[NDArray]: Electric Field Gradient Tensors in the order of atoms. """ header_pattern = ( r"Electric field gradients \(V/A\^2\)\n-*\n ion\s+V_xx\s+V_yy\s+V_zz\s+V_xy\s+V_xz\s+V_yz\n-*\n" @@ -2622,9 +2637,9 @@ def read_nmr_efg_tensor(self) -> list[NDArray]: def read_nmr_efg(self) -> None: """Parse the NMR Electric Field Gradient interpreted values. - Set self.data["efg"] as: - Electric Field Gradient tensors as a list of dict in the order of atoms from OUTCAR. - Each dict key/value pair corresponds to a component of the tensors. + Renders accessible from self.data: + efg (list[dict]): Electric Field Gradient tensors in the order of atoms. + Each dict key/value pair corresponds to a component of the tensors. """ header_pattern = ( r"^\s+NMR quadrupolar parameters\s+$\n" @@ -2652,8 +2667,8 @@ def read_elastic_tensor(self) -> None: """ Parse the elastic tensor data. - Set self.data["elastic_tensor"] as: - 6x6 array corresponding to the elastic tensor from the OUTCAR. + Renders accessible from self.data: + elastic_tensor: 6x6 array corresponding to the elastic tensor. """ header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s+Direction\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[X-Z][X-Z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) @@ -2662,7 +2677,11 @@ def read_elastic_tensor(self) -> None: self.data["elastic_tensor"] = et_table def read_piezo_tensor(self) -> None: - """Parse the piezo tensor data.""" + """Parse the piezo tensor data. + + Renders accessible from self.data: + piezo_tensor: TODO: fill value type. + """ header_pattern = r"PIEZOELECTRIC TENSOR for field in x, y, z\s+\(C/m\^2\)\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[x-z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"BORN EFFECTIVE" @@ -2672,8 +2691,8 @@ def read_piezo_tensor(self) -> None: def read_onsite_density_matrices(self) -> None: """Parse the onsite density matrices. - Set self.data["onsite_density_matrices"] as: - List with index corresponding to atom index in Structure. + Renders accessible from self.data: TODO: + onsite_density_matrices (list[dict]): List with index corresponding to atom index in Structure. """ # Matrix size will vary depending on if d or f orbitals are present. # Therefore regex assumes f, but filter out None values if d. @@ -2714,12 +2733,14 @@ def read_corrections( reverse: bool = True, terminate_on_match: bool = True, ) -> None: - """Read the dipol qudropol corrections into - self.data["dipol_quadrupol_correction"]. + """Read the dipol qudropol corrections. Args: reverse (bool): Whether to start from end of OUTCAR. Defaults to True. terminate_on_match (bool): Whether to terminate once match is found. Defaults to True. + + Renders accessible from self.data: + dipol_quadrupol_correction: TODO: fill details. """ patterns = {"dipol_quadrupol_correction": r"dipol\+quadrupol energy correction\s+([\d\-\.]+)"} self.read_pattern( @@ -2742,17 +2763,15 @@ def read_neb( Args: reverse (bool): Read files in reverse. Defaults to false. Useful for - large files, esp OUTCARs, especially when used with - terminate_on_match. Defaults to True here since we usually - want only the final value. + large files, especially when used with terminate_on_match. + Defaults to True here since we usually want only the final value. terminate_on_match (bool): Whether to terminate when there is at least one match in each key in pattern. Defaults to True here since we usually want only the final value. - Renders accessible: - tangent_force - Final tangent force. - energy - Final energy. - These can be accessed under Outcar.data[key] + Renders accessible from self.data: + tangent_force (float): Final tangent force. + energy (float): Final energy. """ patterns = { "energy": r"energy\(sigma->0\)\s+=\s+([\d\-\.]+)", @@ -2775,17 +2794,18 @@ def read_igpar(self) -> None: See VASP sections "LBERRY, IGPAR, NPPSTR, DIPOL" for info on what these are. - Renders accessible: - er_ev = e_ev (dictionary with Spin.up/Spin.down as keys) - er_bp = e_bp (dictionary with Spin.up/Spin.down as keys) - er_ev_tot = spin up + spin down summed - er_bp_tot = spin up + spin down summed - p_elc = spin up + spin down summed - p_ion = spin up + spin down summed. + Renders accessible as attributes: # TODO: double check type + er_ev (dict): e_ev (Spin.up/Spin.down as keys). + er_bp (dict): e_bp (Spin.up/Spin.down as keys). + er_ev_tot: spin up + spin down summed. + er_bp_tot: spin up + spin down summed. + p_elec (int): spin up + spin down summed. + p_ion (int): spin up + spin down summed. """ # Variables to be filled - self.er_ev = {} # dict (Spin.up/down) of array(3*float) - self.er_bp = {} # dict (Spin.up/down) of array(3*float) + # TODO: double check type + self.er_ev: dict = {} # (Spin.up/down) of array(3*float) + self.er_bp: dict = {} # (Spin.up/down) of array(3*float) self.er_ev_tot = None # array(3*float) self.er_bp_tot = None # array(3*float) self.p_elec: int | None = None @@ -2880,10 +2900,12 @@ def p_ion(results, match): except Exception as exc: raise RuntimeError("IGPAR OUTCAR could not be parsed.") from exc - def read_internal_strain_tensor(self): - """Read the internal strain tensor and populates - self.internal_strain_tensor with an array of voigt notation - tensors for each site. + def read_internal_strain_tensor(self) -> None: + """Read the internal strain tensor. + + Renders accessible as attributes: + # TODO: add type + internal_strain_tensor: an array of voigt notation tensors for each site. """ search = [] @@ -2929,7 +2951,8 @@ def internal_strain_data(results, match: str) -> None: def read_lepsilon(self) -> None: """Read a LEPSILON run. - TODO: Document the actual variables. + Renders accessible as attributes: + TODO: """ try: search = [] @@ -3085,7 +3108,8 @@ def born_section_stop(results, _match): def read_lepsilon_ionic(self) -> None: """Read the ionic component of a LEPSILON run. - TODO: Document the actual variables. + Renders accessible as attributes: + TODO: """ try: search = [] @@ -3211,7 +3235,8 @@ def piezo_section_stop(results, _match): def read_lcalcpol(self) -> None: """Read the LCALCPOL. - TODO: Document the actual variables. + Renders accessible as attributes: + TODO: """ self.p_elec = None self.p_sp1: int | None = None @@ -3314,7 +3339,11 @@ def p_ion(results, match): raise RuntimeError("LCALCPOL OUTCAR could not be parsed.") from exc def read_pseudo_zval(self) -> None: - """Create a pseudopotential ZVAL dictionary.""" + """Create a pseudopotential ZVAL dictionary. + + Renders accessible as attributes: + TODO: + """ try: def atom_symbols(results, match): @@ -3340,6 +3369,7 @@ def zvals(results, match): self.zval_dict = dict(zip(self.atom_symbols, self.zvals, strict=True)) # type: ignore[attr-defined] # Clean up + # TODO: is del necessary (need benchmark)? del self.atom_symbols # type: ignore[attr-defined] del self.zvals # type: ignore[attr-defined] @@ -3350,7 +3380,7 @@ def read_core_state_eigen(self) -> list[dict]: """Read the core state eigenenergies at each ionic step. Returns: - A list of dict over the atom such as [{"AO":[core state eig]}]. + list[dict]: The atom such as [{"AO":[core state eig]}]. The core state eigenenergie list for each AO is over all ionic step. @@ -3393,8 +3423,9 @@ def read_avg_core_poten(self) -> list[list]: """Read the core potential at each ionic step. Returns: - A list for each ionic step containing a list of the average core - potentials for each atom: [[avg core pot]]. + list[list]: A list for each ionic step containing a list of + the average core potentials for each atom: [[avg core pot]]. + TODO: what is "[avg core pot]", is it an array of 3 or "avg_core_pot"? Example: The average core potential of the 2nd atom of the structure at the @@ -3427,7 +3458,7 @@ def read_avg_core_poten(self) -> list[list]: return aps - def as_dict(self) -> dict: + def as_dict(self) -> dict[str, Any]: """MSONable dict.""" dct = { "@module": type(self).__module__, From ea1f2684c066925767cb76beee8005f8b5f45d40 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 17:54:07 +0800 Subject: [PATCH 02/16] relocate private parser method and as_dict --- src/pymatgen/io/vasp/outputs.py | 170 ++++++++++++++++---------------- 1 file changed, 86 insertions(+), 84 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 5cad19905d4..91f2a96a8a8 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -1970,6 +1970,7 @@ class Outcar: One can then call a specific reader depending on the type of run being performed. These are currently (see the documentation of those methods for more details): + # TODO: this seem pretty outdated? - read_igpar - read_lepsilon - read_lcalcpol @@ -2278,6 +2279,23 @@ def __init__(self, filename: PathLike) -> None: final_energy_contribs[key] = sum(map(float, self.data[key][-1])) self.final_energy_contribs = final_energy_contribs + @staticmethod + def _parse_sci_notation(line: str) -> list[float]: + """ + Parse lines with values in scientific notation and potentially + without spaces in between the values. This assumes that the scientific + notation always lists two digits for the exponent, e.g. 3.535E-02. + + Args: + line: line to parse. + + Returns: + list[float]: numbers if found, empty list if not. + """ + if match := re.findall(r"[\.\-\d]+E[\+\-]\d{2}", line): + return [float(t) for t in match] + return [] + def read_pattern( self, patterns: dict[str, str], @@ -2394,6 +2412,73 @@ def read_table_pattern( self.data[attribute_name] = retained_data return retained_data + def as_dict(self) -> dict[str, Any]: + """MSONable dict.""" + dct = { + "@module": type(self).__module__, + "@class": type(self).__name__, + "efermi": self.efermi, + "run_stats": self.run_stats, + "magnetization": self.magnetization, + "charge": self.charge, + "total_magnetization": self.total_mag, + "nelect": self.nelect, + "is_stopped": self.is_stopped, + "drift": self.drift, + "ngf": self.ngf, + "sampling_radii": self.sampling_radii, + "electrostatic_potential": self.electrostatic_potential, + } + + if self.lepsilon: + dct |= { + "piezo_tensor": self.piezo_tensor, + "dielectric_tensor": self.dielectric_tensor, + "born": self.born, + } + + if self.dfpt: + dct["internal_strain_tensor"] = self.internal_strain_tensor + + if self.dfpt and self.lepsilon: + dct |= { + "piezo_ionic_tensor": self.piezo_ionic_tensor, + "dielectric_ionic_tensor": self.dielectric_ionic_tensor, + } + + if self.lcalcpol: + dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} + if self.spin and not self.noncollinear: + dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} + dct["zval_dict"] = self.zval_dict + + if self.nmr_cs: + dct.update( + nmr_cs={ + "valence and core": self.data["chemical_shielding"]["valence_and_core"], + "valence_only": self.data["chemical_shielding"]["valence_only"], + "g0": self.data["cs_g0_contribution"], + "core": self.data["cs_core_contribution"], + "raw": self.data["unsym_cs_tensor"], + } + ) + + if self.nmr_efg: + dct.update( + nmr_efg={ + "raw": self.data["unsym_efg_tensor"], + "parameters": self.data["efg"], + } + ) + + if self.has_onsite_density_matrices: + # Cast Spin to str for consistency with electronic_structure + # TODO: improve handling of Enum (de)serialization in monty + onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] + dct["onsite_density_matrices"] = onsite_density_matrices + + return dct + def read_electrostatic_potential(self) -> None: """Parse the eletrostatic potential for the last ionic step. @@ -2422,23 +2507,6 @@ def read_electrostatic_potential(self) -> None: self.electrostatic_potential = [*map(float, pots)] - @staticmethod - def _parse_sci_notation(line: str) -> list[float]: - """ - Parse lines with values in scientific notation and potentially - without spaces in between the values. This assumes that the scientific - notation always lists two digits for the exponent, e.g. 3.535E-02. - - Args: - line: line to parse. - - Returns: - list[float]: numbers if found, empty list if not. - """ - if match := re.findall(r"[\.\-\d]+E[\+\-]\d{2}", line): - return [float(t) for t in match] - return [] - def read_freq_dielectric(self) -> None: """ Parse the frequency dependent dielectric function (obtained with @@ -3381,6 +3449,7 @@ def read_core_state_eigen(self) -> list[dict]: Returns: list[dict]: The atom such as [{"AO":[core state eig]}]. + # TODO: what is "[core state eig]"? array or "core_state_eig" likely the latter The core state eigenenergie list for each AO is over all ionic step. @@ -3458,73 +3527,6 @@ def read_avg_core_poten(self) -> list[list]: return aps - def as_dict(self) -> dict[str, Any]: - """MSONable dict.""" - dct = { - "@module": type(self).__module__, - "@class": type(self).__name__, - "efermi": self.efermi, - "run_stats": self.run_stats, - "magnetization": self.magnetization, - "charge": self.charge, - "total_magnetization": self.total_mag, - "nelect": self.nelect, - "is_stopped": self.is_stopped, - "drift": self.drift, - "ngf": self.ngf, - "sampling_radii": self.sampling_radii, - "electrostatic_potential": self.electrostatic_potential, - } - - if self.lepsilon: - dct |= { - "piezo_tensor": self.piezo_tensor, - "dielectric_tensor": self.dielectric_tensor, - "born": self.born, - } - - if self.dfpt: - dct["internal_strain_tensor"] = self.internal_strain_tensor - - if self.dfpt and self.lepsilon: - dct |= { - "piezo_ionic_tensor": self.piezo_ionic_tensor, - "dielectric_ionic_tensor": self.dielectric_ionic_tensor, - } - - if self.lcalcpol: - dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} - if self.spin and not self.noncollinear: - dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} - dct["zval_dict"] = self.zval_dict - - if self.nmr_cs: - dct.update( - nmr_cs={ - "valence and core": self.data["chemical_shielding"]["valence_and_core"], - "valence_only": self.data["chemical_shielding"]["valence_only"], - "g0": self.data["cs_g0_contribution"], - "core": self.data["cs_core_contribution"], - "raw": self.data["unsym_cs_tensor"], - } - ) - - if self.nmr_efg: - dct.update( - nmr_efg={ - "raw": self.data["unsym_efg_tensor"], - "parameters": self.data["efg"], - } - ) - - if self.has_onsite_density_matrices: - # Cast Spin to str for consistency with electronic_structure - # TODO: improve handling of Enum (de)serialization in monty - onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] - dct["onsite_density_matrices"] = onsite_density_matrices - - return dct - def read_fermi_contact_shift(self) -> None: """Read Fermi contact (isotropic) hyperfine coupling parameter. From 0184d9c65cbac6ba4369dfa58c51c20f289ca91e Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 18:03:09 +0800 Subject: [PATCH 03/16] tweak class docstring --- src/pymatgen/io/vasp/outputs.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 91f2a96a8a8..7baac23bb82 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -1917,14 +1917,22 @@ def as_dict(self) -> dict: class Outcar: - """Parser for data in OUTCAR that is not available in Vasprun.xml. + """Parser for data in OUTCAR that is not available in vasprun.xml. Note, this class works a bit differently than most of the other - VASP objects, since OUTCAR can be very different depending on which + VASP readers, since OUTCAR can be very different depending on which "type of run" performed. - Create the OUTCAR class with a filename reads "regular parameters" that - are always present. + Creating an Outcar instance with a filename reads "regular parameters" that + are always present. One can then call a specific reader method depending on the + type of run being performed, including (see the docstring of corresponding + method for more details): + # TODO: this seem pretty outdated? + - read_igpar + - read_lepsilon + - read_lcalcpol + - read_core_state_eign + - read_avg_core_pot Attributes: magnetization (tuple[dict]): Magnetization on each ion, e.g. @@ -1968,15 +1976,6 @@ class Outcar: spin (bool): If spin-polarization is enabled via ISPIN. total_mag (float): Total magnetization (in terms of the number of unpaired electrons). - One can then call a specific reader depending on the type of run being performed. - These are currently (see the documentation of those methods for more details): - # TODO: this seem pretty outdated? - - read_igpar - - read_lepsilon - - read_lcalcpol - - read_core_state_eign - - read_avg_core_pot - Authors: Rickard Armiento, Shyue Ping Ong """ @@ -2326,14 +2325,14 @@ def read_pattern( are list[list], because you can grep multiple items on one line. """ matches = regrep( - self.filename, - patterns, + filename=self.filename, + patterns=patterns, reverse=reverse, terminate_on_match=terminate_on_match, postprocess=postprocess, ) - for k in patterns: - self.data[k] = [i[0] for i in matches.get(k, [])] + for key in patterns: + self.data[key] = [i[0] for i in matches.get(key, [])] def read_table_pattern( self, From 6b8081995492328889f0f6b96da9f5e98d72afb4 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 18:16:17 +0800 Subject: [PATCH 04/16] NEED CONFIRM: deprecate read_pattern, rename to _parse_pattern --- src/pymatgen/io/vasp/outputs.py | 79 ++++++++++++++++++--------------- tests/io/vasp/test_outputs.py | 8 ++-- 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 7baac23bb82..696e555da82 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, cast import numpy as np +from monty.dev import deprecated from monty.io import reverse_readfile, zopen from monty.json import MSONable, jsanitize from monty.os.path import zpath @@ -2141,7 +2142,7 @@ def __init__(self, filename: PathLike) -> None: self.data: dict = {} # Read "total number of plane waves", NPLWV: - self.read_pattern( + self._parse_pattern( {"nplwv": r"total plane-waves NPLWV =\s+(\*{6}|\d+)"}, terminate_on_match=True, ) @@ -2152,7 +2153,7 @@ def __init__(self, filename: PathLike) -> None: nplwvs_at_kpoints = [ n - for [n] in self.read_table_pattern( + for [n] in self._parse_table_pattern( r"\n{3}-{104}\n{3}", r".+plane waves:\s+(\*{6,}|\d+)", ( @@ -2172,7 +2173,7 @@ def __init__(self, filename: PathLike) -> None: pass # Read the drift - self.read_pattern( + self._parse_pattern( {"drift": r"total drift:\s+([\.\-\d]+)\s+([\.\-\d]+)\s+([\.\-\d]+)"}, terminate_on_match=False, postprocess=float, @@ -2180,15 +2181,15 @@ def __init__(self, filename: PathLike) -> None: self.drift = self.data.get("drift", []) # Check if calculation is spin polarized - self.read_pattern({"spin": r"ISPIN\s*=\s*2"}) + self._parse_pattern({"spin": r"ISPIN\s*=\s*2"}) self.spin = bool(self.data.get("spin", [])) # Check if calculation is non-collinear - self.read_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"}) + self._parse_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"}) self.noncollinear = bool(self.data.get("noncollinear", [])) # Check if the calculation type is DFPT - self.read_pattern( + self._parse_pattern( {"ibrion": r"IBRION =\s+([\-\d]+)"}, terminate_on_match=True, postprocess=int, @@ -2200,7 +2201,7 @@ def __init__(self, filename: PathLike) -> None: self.dfpt = False # Check if LEPSILON is True and read piezo data if so - self.read_pattern({"epsilon": r"LEPSILON\s*=\s*T"}) + self._parse_pattern({"epsilon": r"LEPSILON\s*=\s*T"}) if self.data.get("epsilon", []): self.lepsilon = True self.read_lepsilon() @@ -2211,7 +2212,7 @@ def __init__(self, filename: PathLike) -> None: self.lepsilon = False # Check if LCALCPOL is True and read polarization data if so - self.read_pattern({"calcpol": r"LCALCPOL\s*=\s*T"}) + self._parse_pattern({"calcpol": r"LCALCPOL\s*=\s*T"}) if self.data.get("calcpol", []): self.lcalcpol = True self.read_lcalcpol() @@ -2223,11 +2224,11 @@ def __init__(self, filename: PathLike) -> None: self.electrostatic_potential: list[float] | None = None self.ngf = None self.sampling_radii: list[float] | None = None - self.read_pattern({"electrostatic": r"average \(electrostatic\) potential at core"}) + self._parse_pattern({"electrostatic": r"average \(electrostatic\) potential at core"}) if self.data.get("electrostatic", []): self.read_electrostatic_potential() - self.read_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"}) + self._parse_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"}) if self.data.get("nmr_cs"): self.nmr_cs = True self.read_chemical_shielding() @@ -2237,7 +2238,7 @@ def __init__(self, filename: PathLike) -> None: else: self.nmr_cs = False - self.read_pattern({"nmr_efg": r"NMR quadrupolar parameters"}) + self._parse_pattern({"nmr_efg": r"NMR quadrupolar parameters"}) if self.data.get("nmr_efg"): self.nmr_efg = True self.read_nmr_efg() @@ -2245,7 +2246,7 @@ def __init__(self, filename: PathLike) -> None: else: self.nmr_efg = False - self.read_pattern( + self._parse_pattern( {"has_onsite_density_matrices": r"onsite density matrix"}, terminate_on_match=True, ) @@ -2270,9 +2271,9 @@ def __init__(self, filename: PathLike) -> None: "Ediel_sol", ): if key == "PAW double counting": - self.read_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"}) + self._parse_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"}) else: - self.read_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"}) + self._parse_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"}) if not self.data[key]: continue final_energy_contribs[key] = sum(map(float, self.data[key][-1])) @@ -2295,7 +2296,7 @@ def _parse_sci_notation(line: str) -> list[float]: return [float(t) for t in match] return [] - def read_pattern( + def _parse_pattern( self, patterns: dict[str, str], reverse: bool = False, @@ -2303,7 +2304,7 @@ def read_pattern( postprocess: Callable = str, ) -> None: r""" - General pattern reading. Use monty's regrep method and take the same + General pattern parser. Use monty's regrep method and take the same arguments. Args: @@ -2334,7 +2335,7 @@ def read_pattern( for key in patterns: self.data[key] = [i[0] for i in matches.get(key, [])] - def read_table_pattern( + def _parse_table_pattern( self, header_pattern: str, row_pattern: str, @@ -2411,6 +2412,14 @@ def read_table_pattern( self.data[attribute_name] = retained_data return retained_data + @deprecated(_parse_pattern) + def read_pattern(self, *args, **kwargs): + self._parse_pattern(*args, **kwargs) + + @deprecated(_parse_table_pattern) + def read_table_pattern(self, *args, **kwargs): + return self._parse_table_pattern(*args, **kwargs) + def as_dict(self) -> dict[str, Any]: """MSONable dict.""" dct = { @@ -2488,18 +2497,18 @@ def read_electrostatic_potential(self) -> None: electrostatic_potential: """ pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"} - self.read_pattern(pattern, postprocess=int) + self._parse_pattern(pattern, postprocess=int) self.ngf = self.data.get("ngf", [[]])[0] pattern = {"radii": r"the test charge radii are((?:\s+[\.\-\d]+)+)"} - self.read_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str) + self._parse_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str) self.sampling_radii = [*map(float, self.data["radii"][0][0].split())] header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)" table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pots: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern) + pots: list = self._parse_table_pattern(header_pattern, table_pattern, footer_pattern) _pots: str = "".join(itertools.chain.from_iterable(pots)) pots = re.findall(r"\s+\d+\s*([\.\-\d]+)+", _pots) @@ -2591,11 +2600,11 @@ def read_chemical_shielding(self) -> None: row_pattern = r"\d+(?:\s+[-]?\d+\.\d+){3}\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3) footer_pattern = r"-{50,}\s*$" h1 = header_pattern + first_part_pattern - cs_valence_only = self.read_table_pattern( + cs_valence_only = self._parse_table_pattern( h1, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) h2 = header_pattern + swallon_valence_body_pattern - cs_valence_and_core = self.read_table_pattern( + cs_valence_and_core = self._parse_table_pattern( h2, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) self.data["chemical_shielding"] = { @@ -2617,7 +2626,7 @@ def read_cs_g0_contribution(self) -> None: ) row_pattern = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3) footer_pattern = r"\s+-{50,}\s*$" - self.read_table_pattern( + self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2635,7 +2644,7 @@ def read_cs_core_contribution(self) -> None: header_pattern = r"^\s+Core NMR properties\s*$\n\n^\s+typ\s+El\s+Core shift \(ppm\)\s*$\n^\s+-{20,}$\n" row_pattern = r"\d+\s+(?P[A-Z][a-z]?\w?)\s+(?P[-]?\d+\.\d+)" footer_pattern = r"\s+-{20,}\s*$" - self.read_table_pattern( + self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2696,7 +2705,7 @@ def read_nmr_efg_tensor(self) -> list[NDArray]: row_pattern = r"\d+\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)" footer_pattern = r"-*\n" - data = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + data = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) tensors = [make_symmetric_matrix_from_upper_tri(d) for d in data] self.data["unsym_efg_tensor"] = tensors return tensors @@ -2721,7 +2730,7 @@ def read_nmr_efg(self) -> None: r"\d+\s+(?P[-]?\d+\.\d+)\s+(?P[-]?\d+\.\d+)\s+(?P[-]?\d+\.\d+)" ) footer_pattern = r"-{50,}\s*$" - self.read_table_pattern( + self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2740,7 +2749,7 @@ def read_elastic_tensor(self) -> None: header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s+Direction\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[X-Z][X-Z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"\-+" - et_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + et_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) self.data["elastic_tensor"] = et_table def read_piezo_tensor(self) -> None: @@ -2752,7 +2761,7 @@ def read_piezo_tensor(self) -> None: header_pattern = r"PIEZOELECTRIC TENSOR for field in x, y, z\s+\(C/m\^2\)\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[x-z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"BORN EFFECTIVE" - pt_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + pt_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) self.data["piezo_tensor"] = pt_table def read_onsite_density_matrices(self) -> None: @@ -2766,7 +2775,7 @@ def read_onsite_density_matrices(self) -> None: header_pattern = r"spin component 1\n" row_pattern = r"[^\S\r\n]*(?:(-?[\d.]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?" footer_pattern = r"\nspin component 2" - spin1_component = self.read_table_pattern( + spin1_component = self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2781,7 +2790,7 @@ def read_onsite_density_matrices(self) -> None: header_pattern = r"spin component 2\n" row_pattern = r"[^\S\r\n]*(?:([\d.-]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?" footer_pattern = r"\n occupancies and eigenvectors" - spin2_component = self.read_table_pattern( + spin2_component = self._parse_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2810,7 +2819,7 @@ def read_corrections( dipol_quadrupol_correction: TODO: fill details. """ patterns = {"dipol_quadrupol_correction": r"dipol\+quadrupol energy correction\s+([\d\-\.]+)"} - self.read_pattern( + self._parse_pattern( patterns, reverse=reverse, terminate_on_match=terminate_on_match, @@ -2845,7 +2854,7 @@ def read_neb( "tangent_force": r"(NEB: projections on to tangent \(spring, REAL\)\s+\S+|tangential force \(eV/A\))\s+" r"([\d\-\.]+)", } - self.read_pattern( + self._parse_pattern( patterns, reverse=reverse, terminate_on_match=terminate_on_match, @@ -3552,7 +3561,7 @@ def read_fermi_contact_shift(self) -> None: ) row_pattern1 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 5) footer_pattern = r"\-+" - fch_table = self.read_table_pattern( + fch_table = self._parse_table_pattern( header_pattern1, row_pattern1, footer_pattern, @@ -3568,7 +3577,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern2 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 6) - dh_table = self.read_table_pattern( + dh_table = self._parse_table_pattern( header_pattern2, row_pattern2, footer_pattern, @@ -3585,7 +3594,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern3 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 4) - th_table = self.read_table_pattern( + th_table = self._parse_table_pattern( header_pattern3, row_pattern3, footer_pattern, diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index fe8b5ad83dc..7ddf2aa56b7 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -1382,21 +1382,21 @@ def test_energies(self): assert outcar.final_energy_wo_entrp == approx(-15.83863167) assert outcar.final_fr_energy == approx(-15.92115453) - def test_read_table_pattern(self): + def test_parse_table_pattern(self): outcar = Outcar(f"{VASP_OUT_DIR}/OUTCAR.gz") header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)" table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pots = outcar.read_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True) + pots = outcar._parse_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True) ref_last = [ [" 1 -26.0704 2 -45.5046 3 -45.5046 4 -72.9539 5 -73.0621"], [" 6 -72.9539 7 -73.0621"], ] assert pots == ref_last - pots = outcar.read_table_pattern( + pots = outcar._parse_table_pattern( header_pattern, table_pattern, footer_pattern, @@ -1413,7 +1413,7 @@ def test_read_table_pattern(self): ValueError, match="last_one_only and first_one_only options are incompatible", ): - outcar.read_table_pattern( + outcar._parse_table_pattern( header_pattern, table_pattern, footer_pattern, From b87517e4c40fb761379a132af6a3dd6905126d44 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 18:41:56 +0800 Subject: [PATCH 05/16] update available readers --- src/pymatgen/io/vasp/outputs.py | 42 +++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 696e555da82..2ddef62e595 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -1921,19 +1921,36 @@ class Outcar: """Parser for data in OUTCAR that is not available in vasprun.xml. Note, this class works a bit differently than most of the other - VASP readers, since OUTCAR can be very different depending on which + VASP parsers, since OUTCAR can be very different depending on which "type of run" performed. Creating an Outcar instance with a filename reads "regular parameters" that are always present. One can then call a specific reader method depending on the type of run being performed, including (see the docstring of corresponding method for more details): - # TODO: this seem pretty outdated? + - read_avg_core_poten + - read_chemical_shielding + - read_core_state_eigen + - read_corrections + - read_cs_core_contribution + - read_cs_g0_contribution + - read_cs_raw_symmetrized_tensors + - read_elastic_tensor + - read_electrostatic_potential + - read_fermi_contact_shift + - read_freq_dielectric - read_igpar - - read_lepsilon + - read_internal_strain_tensor - read_lcalcpol - - read_core_state_eign - - read_avg_core_pot + - read_lepsilon + - read_lepsilon_ionic + - read_neb + - read_nmr_efg + - read_nmr_efg_tensor + - read_onsite_density_matrices + - read_piezo_tensor + - read_pseudo_zval + - read_table_pattern Attributes: magnetization (tuple[dict]): Magnetization on each ion, e.g. @@ -2141,7 +2158,7 @@ def __init__(self, filename: PathLike) -> None: self.final_fr_energy = e_fr_energy self.data: dict = {} - # Read "total number of plane waves", NPLWV: + # Read NPLWV (total number of plane waves) self._parse_pattern( {"nplwv": r"total plane-waves NPLWV =\s+(\*{6}|\d+)"}, terminate_on_match=True, @@ -2490,11 +2507,11 @@ def as_dict(self) -> dict[str, Any]: def read_electrostatic_potential(self) -> None: """Parse the eletrostatic potential for the last ionic step. - Renders accessible as attributes: TODO: + Renders accessible as attributes: ngf: TODO: double check - sampling_radii: TODO: double check radii: TODO: double check - electrostatic_potential: + sampling_radii: TODO: double check + electrostatic_potential (list[float]): The eletrostatic potential. """ pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"} self._parse_pattern(pattern, postprocess=int) @@ -2508,10 +2525,9 @@ def read_electrostatic_potential(self) -> None: table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pots: list = self._parse_table_pattern(header_pattern, table_pattern, footer_pattern) - _pots: str = "".join(itertools.chain.from_iterable(pots)) - - pots = re.findall(r"\s+\d+\s*([\.\-\d]+)+", _pots) + pot_patterns: list = self._parse_table_pattern(header_pattern, table_pattern, footer_pattern) + pot_patterns_str: str = "".join(itertools.chain.from_iterable(pot_patterns)) + pots: list = re.findall(r"\s+\d+\s*([\.\-\d]+)+", pot_patterns_str) self.electrostatic_potential = [*map(float, pots)] From 6b184374d48bfcad0c57526fa047eb67c155c86a Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:07:29 +0800 Subject: [PATCH 06/16] micro_pyawk doc clean up --- src/pymatgen/util/io_utils.py | 49 +++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index f8c7d268f43..d6550f4fb29 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -9,7 +9,10 @@ from monty.io import zopen if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable, Iterator + from typing import Any + + from pymatgen.util.typing import PathLike __author__ = "Shyue Ping Ong, Rickard Armiento, Anubhav Jain, G Matteo, Ioannis Petousis" __copyright__ = "Copyright 2011, The Materials Project" @@ -48,36 +51,41 @@ def clean_lines( yield clean_string -def micro_pyawk(filename, search, results=None, debug=None, postdebug=None): +def micro_pyawk( + filename: PathLike, + search: list[tuple[re.Pattern | str, Callable, Callable]], + results: Any | None = None, + debug: Callable | None = None, + postdebug: Callable | None = None, +) -> Any: """Small awk-mimicking search routine. - 'file' is file to search through. - 'search' is the "search program", a list of lists/tuples with 3 elements; - i.e. [[regex, test, run], [regex, test, run], ...] - 'results' is a an object that your search program will have access to for - storing results. - - Here regex is either as a Regex object, or a string that we compile into a - Regex. test and run are callable objects. + This function goes through each line in the file, and if regex matches that + line AND test(results, line) is True (OR test is None) we execute + run(results, match), where match is the Match object from running + Pattern.match. - This function goes through each line in filename, and if regex matches that - line *and* test(results,line)==True (or test is None) we execute - run(results,match), where match is the match object from running - Regex.match. + TODO: deprecate and remove debug/postdebug? - The default results is an empty dictionary. Passing a results object let - you interact with it in run() and test(). Hence, in many occasions it is - thus clever to use results=self. - - Author: Rickard Armiento, Ioannis Petousis + Args: + filename (PathLike): The file to search through. + search (list[tuple[Pattern | str, Callable, Callable]]): The "search program" of + 3 elements, i.e. [(regex, test, run), ...]. + Here "regex" is either a Pattern object, or a string that we compile + into a Pattern. + results: An object to store results. Default as an empty dictionary. + Passing a results object let you interact with it via "run" and "test". + Hence, in many occasions it is clever to use the instance itself as results. Returns: dict[str, Any]: The results dictionary. + + Author: Rickard Armiento, Ioannis Petousis """ if results is None: results = {} - # Compile regex strings + # Compile regex strings to Pattern for entry in search: entry[0] = re.compile(entry[0]) @@ -95,5 +103,6 @@ def micro_pyawk(filename, search, results=None, debug=None, postdebug=None): return results +# TODO: this seem to do nothing, remove it? umask = os.umask(0) os.umask(umask) From f3dc1e1ee1532589f721b763b37ebec3f4af88e5 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:13:31 +0800 Subject: [PATCH 07/16] schedule debug arg for removal --- src/pymatgen/util/io_utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index d6550f4fb29..478dc246952 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -4,6 +4,7 @@ import os import re +import warnings from typing import TYPE_CHECKING from monty.io import zopen @@ -65,8 +66,6 @@ def micro_pyawk( run(results, match), where match is the Match object from running Pattern.match. - TODO: deprecate and remove debug/postdebug? - Args: filename (PathLike): The file to search through. search (list[tuple[Pattern | str, Callable, Callable]]): The "search program" of @@ -76,12 +75,20 @@ def micro_pyawk( results: An object to store results. Default as an empty dictionary. Passing a results object let you interact with it via "run" and "test". Hence, in many occasions it is clever to use the instance itself as results. + debug (Callable): Debug "run". + postdebug (Callable): Another "run" after debug "run". Returns: dict[str, Any]: The results dictionary. Author: Rickard Armiento, Ioannis Petousis """ + # TODO: remove debug and postdebug after 2025-11-09 if no one is opposing + if debug is not None: + warnings.warn("arg debug is scheduled for removal, see PR4160", DeprecationWarning, stacklevel=2) + if postdebug is not None: + warnings.warn("arg postdebug is scheduled for removal, see PR4160", DeprecationWarning, stacklevel=2) + if results is None: results = {} From 5164beb1e8e4b788c7a5d1760264ffb018b66150 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:21:49 +0800 Subject: [PATCH 08/16] remove umask that is doing nothing --- src/pymatgen/util/io_utils.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index 478dc246952..9b1a9431440 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os import re import warnings from typing import TYPE_CHECKING @@ -108,8 +107,3 @@ def micro_pyawk( postdebug(results, match) return results - - -# TODO: this seem to do nothing, remove it? -umask = os.umask(0) -os.umask(umask) From de4af27bef2172354ed1b626799f022a757a8016 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:32:31 +0800 Subject: [PATCH 09/16] the docstring must be incorrect, tuple is not mutable --- src/pymatgen/util/io_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index 9b1a9431440..9396edaa556 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -92,8 +92,7 @@ def micro_pyawk( results = {} # Compile regex strings to Pattern - for entry in search: - entry[0] = re.compile(entry[0]) + search = [(re.compile(pattern), test, run) for pattern, test, run in search] with zopen(filename, mode="rt") as file: for line in file: From 6bd1c32c79a28d732d95ab1916e95124879ea71d Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:35:12 +0800 Subject: [PATCH 10/16] unpack tuple for readability --- src/pymatgen/util/io_utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index 9396edaa556..b903c177de0 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -92,16 +92,18 @@ def micro_pyawk( results = {} # Compile regex strings to Pattern - search = [(re.compile(pattern), test, run) for pattern, test, run in search] + search = [(re.compile(regex), test, run) for regex, test, run in search] with zopen(filename, mode="rt") as file: for line in file: - for entry in search: - match = re.search(entry[0], line) - if match and (entry[1] is None or entry[1](results, line)): + for regex, test, run in search: + match = re.search(regex, line) + + if match and (test is None or test(results, line)): if debug is not None: debug(results, match) - entry[2](results, match) + + run(results, match) if postdebug is not None: postdebug(results, match) From 1a18fa5f5e668aa654f2beca8155ead94a604ca6 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:38:03 +0800 Subject: [PATCH 11/16] fix return type --- src/pymatgen/util/io_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index b903c177de0..35d531c63c3 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -78,7 +78,7 @@ def micro_pyawk( postdebug (Callable): Another "run" after debug "run". Returns: - dict[str, Any]: The results dictionary. + Any: The results object. Author: Rickard Armiento, Ioannis Petousis """ From 32a554a7e8d1612d0b158d23659872d801c0929b Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:39:42 +0800 Subject: [PATCH 12/16] use is to check None --- src/pymatgen/util/io_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index 35d531c63c3..f386e438508 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -99,7 +99,7 @@ def micro_pyawk( for regex, test, run in search: match = re.search(regex, line) - if match and (test is None or test(results, line)): + if match is not None and (test is None or test(results, line)): if debug is not None: debug(results, match) From e058c33155c3667cde0195ca805f3fc5b8aecb2c Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:51:19 +0800 Subject: [PATCH 13/16] Revert "NEED CONFIRM: deprecate read_pattern, rename to _parse_pattern" This reverts commit 6b8081995492328889f0f6b96da9f5e98d72afb4. --- src/pymatgen/io/vasp/outputs.py | 79 +++++++++++++++------------------ tests/io/vasp/test_outputs.py | 8 ++-- 2 files changed, 39 insertions(+), 48 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 2ddef62e595..1d84f63a95d 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, cast import numpy as np -from monty.dev import deprecated from monty.io import reverse_readfile, zopen from monty.json import MSONable, jsanitize from monty.os.path import zpath @@ -2159,7 +2158,7 @@ def __init__(self, filename: PathLike) -> None: self.data: dict = {} # Read NPLWV (total number of plane waves) - self._parse_pattern( + self.read_pattern( {"nplwv": r"total plane-waves NPLWV =\s+(\*{6}|\d+)"}, terminate_on_match=True, ) @@ -2170,7 +2169,7 @@ def __init__(self, filename: PathLike) -> None: nplwvs_at_kpoints = [ n - for [n] in self._parse_table_pattern( + for [n] in self.read_table_pattern( r"\n{3}-{104}\n{3}", r".+plane waves:\s+(\*{6,}|\d+)", ( @@ -2190,7 +2189,7 @@ def __init__(self, filename: PathLike) -> None: pass # Read the drift - self._parse_pattern( + self.read_pattern( {"drift": r"total drift:\s+([\.\-\d]+)\s+([\.\-\d]+)\s+([\.\-\d]+)"}, terminate_on_match=False, postprocess=float, @@ -2198,15 +2197,15 @@ def __init__(self, filename: PathLike) -> None: self.drift = self.data.get("drift", []) # Check if calculation is spin polarized - self._parse_pattern({"spin": r"ISPIN\s*=\s*2"}) + self.read_pattern({"spin": r"ISPIN\s*=\s*2"}) self.spin = bool(self.data.get("spin", [])) # Check if calculation is non-collinear - self._parse_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"}) + self.read_pattern({"noncollinear": r"LNONCOLLINEAR\s*=\s*T"}) self.noncollinear = bool(self.data.get("noncollinear", [])) # Check if the calculation type is DFPT - self._parse_pattern( + self.read_pattern( {"ibrion": r"IBRION =\s+([\-\d]+)"}, terminate_on_match=True, postprocess=int, @@ -2218,7 +2217,7 @@ def __init__(self, filename: PathLike) -> None: self.dfpt = False # Check if LEPSILON is True and read piezo data if so - self._parse_pattern({"epsilon": r"LEPSILON\s*=\s*T"}) + self.read_pattern({"epsilon": r"LEPSILON\s*=\s*T"}) if self.data.get("epsilon", []): self.lepsilon = True self.read_lepsilon() @@ -2229,7 +2228,7 @@ def __init__(self, filename: PathLike) -> None: self.lepsilon = False # Check if LCALCPOL is True and read polarization data if so - self._parse_pattern({"calcpol": r"LCALCPOL\s*=\s*T"}) + self.read_pattern({"calcpol": r"LCALCPOL\s*=\s*T"}) if self.data.get("calcpol", []): self.lcalcpol = True self.read_lcalcpol() @@ -2241,11 +2240,11 @@ def __init__(self, filename: PathLike) -> None: self.electrostatic_potential: list[float] | None = None self.ngf = None self.sampling_radii: list[float] | None = None - self._parse_pattern({"electrostatic": r"average \(electrostatic\) potential at core"}) + self.read_pattern({"electrostatic": r"average \(electrostatic\) potential at core"}) if self.data.get("electrostatic", []): self.read_electrostatic_potential() - self._parse_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"}) + self.read_pattern({"nmr_cs": r"LCHIMAG\s*=\s*(T)"}) if self.data.get("nmr_cs"): self.nmr_cs = True self.read_chemical_shielding() @@ -2255,7 +2254,7 @@ def __init__(self, filename: PathLike) -> None: else: self.nmr_cs = False - self._parse_pattern({"nmr_efg": r"NMR quadrupolar parameters"}) + self.read_pattern({"nmr_efg": r"NMR quadrupolar parameters"}) if self.data.get("nmr_efg"): self.nmr_efg = True self.read_nmr_efg() @@ -2263,7 +2262,7 @@ def __init__(self, filename: PathLike) -> None: else: self.nmr_efg = False - self._parse_pattern( + self.read_pattern( {"has_onsite_density_matrices": r"onsite density matrix"}, terminate_on_match=True, ) @@ -2288,9 +2287,9 @@ def __init__(self, filename: PathLike) -> None: "Ediel_sol", ): if key == "PAW double counting": - self._parse_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"}) + self.read_pattern({key: rf"{key}\s+=\s+([\.\-\d]+)\s+([\.\-\d]+)"}) else: - self._parse_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"}) + self.read_pattern({key: rf"{key}\s+=\s+([\d\-\.]+)"}) if not self.data[key]: continue final_energy_contribs[key] = sum(map(float, self.data[key][-1])) @@ -2313,7 +2312,7 @@ def _parse_sci_notation(line: str) -> list[float]: return [float(t) for t in match] return [] - def _parse_pattern( + def read_pattern( self, patterns: dict[str, str], reverse: bool = False, @@ -2321,7 +2320,7 @@ def _parse_pattern( postprocess: Callable = str, ) -> None: r""" - General pattern parser. Use monty's regrep method and take the same + General pattern reading. Use monty's regrep method and take the same arguments. Args: @@ -2352,7 +2351,7 @@ def _parse_pattern( for key in patterns: self.data[key] = [i[0] for i in matches.get(key, [])] - def _parse_table_pattern( + def read_table_pattern( self, header_pattern: str, row_pattern: str, @@ -2429,14 +2428,6 @@ def _parse_table_pattern( self.data[attribute_name] = retained_data return retained_data - @deprecated(_parse_pattern) - def read_pattern(self, *args, **kwargs): - self._parse_pattern(*args, **kwargs) - - @deprecated(_parse_table_pattern) - def read_table_pattern(self, *args, **kwargs): - return self._parse_table_pattern(*args, **kwargs) - def as_dict(self) -> dict[str, Any]: """MSONable dict.""" dct = { @@ -2514,18 +2505,18 @@ def read_electrostatic_potential(self) -> None: electrostatic_potential (list[float]): The eletrostatic potential. """ pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"} - self._parse_pattern(pattern, postprocess=int) + self.read_pattern(pattern, postprocess=int) self.ngf = self.data.get("ngf", [[]])[0] pattern = {"radii": r"the test charge radii are((?:\s+[\.\-\d]+)+)"} - self._parse_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str) + self.read_pattern(pattern, reverse=True, terminate_on_match=True, postprocess=str) self.sampling_radii = [*map(float, self.data["radii"][0][0].split())] header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)" table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pot_patterns: list = self._parse_table_pattern(header_pattern, table_pattern, footer_pattern) + pot_patterns: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern) pot_patterns_str: str = "".join(itertools.chain.from_iterable(pot_patterns)) pots: list = re.findall(r"\s+\d+\s*([\.\-\d]+)+", pot_patterns_str) @@ -2616,11 +2607,11 @@ def read_chemical_shielding(self) -> None: row_pattern = r"\d+(?:\s+[-]?\d+\.\d+){3}\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3) footer_pattern = r"-{50,}\s*$" h1 = header_pattern + first_part_pattern - cs_valence_only = self._parse_table_pattern( + cs_valence_only = self.read_table_pattern( h1, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) h2 = header_pattern + swallon_valence_body_pattern - cs_valence_and_core = self._parse_table_pattern( + cs_valence_and_core = self.read_table_pattern( h2, row_pattern, footer_pattern, postprocess=float, last_one_only=True ) self.data["chemical_shielding"] = { @@ -2642,7 +2633,7 @@ def read_cs_g0_contribution(self) -> None: ) row_pattern = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 3) footer_pattern = r"\s+-{50,}\s*$" - self._parse_table_pattern( + self.read_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2660,7 +2651,7 @@ def read_cs_core_contribution(self) -> None: header_pattern = r"^\s+Core NMR properties\s*$\n\n^\s+typ\s+El\s+Core shift \(ppm\)\s*$\n^\s+-{20,}$\n" row_pattern = r"\d+\s+(?P[A-Z][a-z]?\w?)\s+(?P[-]?\d+\.\d+)" footer_pattern = r"\s+-{20,}\s*$" - self._parse_table_pattern( + self.read_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2721,7 +2712,7 @@ def read_nmr_efg_tensor(self) -> list[NDArray]: row_pattern = r"\d+\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)" footer_pattern = r"-*\n" - data = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + data = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) tensors = [make_symmetric_matrix_from_upper_tri(d) for d in data] self.data["unsym_efg_tensor"] = tensors return tensors @@ -2746,7 +2737,7 @@ def read_nmr_efg(self) -> None: r"\d+\s+(?P[-]?\d+\.\d+)\s+(?P[-]?\d+\.\d+)\s+(?P[-]?\d+\.\d+)" ) footer_pattern = r"-{50,}\s*$" - self._parse_table_pattern( + self.read_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2765,7 +2756,7 @@ def read_elastic_tensor(self) -> None: header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s+Direction\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[X-Z][X-Z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"\-+" - et_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + et_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) self.data["elastic_tensor"] = et_table def read_piezo_tensor(self) -> None: @@ -2777,7 +2768,7 @@ def read_piezo_tensor(self) -> None: header_pattern = r"PIEZOELECTRIC TENSOR for field in x, y, z\s+\(C/m\^2\)\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[x-z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"BORN EFFECTIVE" - pt_table = self._parse_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) + pt_table = self.read_table_pattern(header_pattern, row_pattern, footer_pattern, postprocess=float) self.data["piezo_tensor"] = pt_table def read_onsite_density_matrices(self) -> None: @@ -2791,7 +2782,7 @@ def read_onsite_density_matrices(self) -> None: header_pattern = r"spin component 1\n" row_pattern = r"[^\S\r\n]*(?:(-?[\d.]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?" footer_pattern = r"\nspin component 2" - spin1_component = self._parse_table_pattern( + spin1_component = self.read_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2806,7 +2797,7 @@ def read_onsite_density_matrices(self) -> None: header_pattern = r"spin component 2\n" row_pattern = r"[^\S\r\n]*(?:([\d.-]+))" + r"(?:[^\S\r\n]*(-?[\d.]+)[^\S\r\n]*)?" * 6 + r".*?" footer_pattern = r"\n occupancies and eigenvectors" - spin2_component = self._parse_table_pattern( + spin2_component = self.read_table_pattern( header_pattern, row_pattern, footer_pattern, @@ -2835,7 +2826,7 @@ def read_corrections( dipol_quadrupol_correction: TODO: fill details. """ patterns = {"dipol_quadrupol_correction": r"dipol\+quadrupol energy correction\s+([\d\-\.]+)"} - self._parse_pattern( + self.read_pattern( patterns, reverse=reverse, terminate_on_match=terminate_on_match, @@ -2870,7 +2861,7 @@ def read_neb( "tangent_force": r"(NEB: projections on to tangent \(spring, REAL\)\s+\S+|tangential force \(eV/A\))\s+" r"([\d\-\.]+)", } - self._parse_pattern( + self.read_pattern( patterns, reverse=reverse, terminate_on_match=terminate_on_match, @@ -3577,7 +3568,7 @@ def read_fermi_contact_shift(self) -> None: ) row_pattern1 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 5) footer_pattern = r"\-+" - fch_table = self._parse_table_pattern( + fch_table = self.read_table_pattern( header_pattern1, row_pattern1, footer_pattern, @@ -3593,7 +3584,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern2 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 6) - dh_table = self._parse_table_pattern( + dh_table = self.read_table_pattern( header_pattern2, row_pattern2, footer_pattern, @@ -3610,7 +3601,7 @@ def read_fermi_contact_shift(self) -> None: r"\s*\-+" ) row_pattern3 = r"(?:\d+)\s+" + r"\s+".join([r"([-]?\d+\.\d+)"] * 4) - th_table = self._parse_table_pattern( + th_table = self.read_table_pattern( header_pattern3, row_pattern3, footer_pattern, diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index 7ddf2aa56b7..fe8b5ad83dc 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -1382,21 +1382,21 @@ def test_energies(self): assert outcar.final_energy_wo_entrp == approx(-15.83863167) assert outcar.final_fr_energy == approx(-15.92115453) - def test_parse_table_pattern(self): + def test_read_table_pattern(self): outcar = Outcar(f"{VASP_OUT_DIR}/OUTCAR.gz") header_pattern = r"\(the norm of the test charge is\s+[\.\-\d]+\)" table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pots = outcar._parse_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True) + pots = outcar.read_table_pattern(header_pattern, table_pattern, footer_pattern, last_one_only=True) ref_last = [ [" 1 -26.0704 2 -45.5046 3 -45.5046 4 -72.9539 5 -73.0621"], [" 6 -72.9539 7 -73.0621"], ] assert pots == ref_last - pots = outcar._parse_table_pattern( + pots = outcar.read_table_pattern( header_pattern, table_pattern, footer_pattern, @@ -1413,7 +1413,7 @@ def test_parse_table_pattern(self): ValueError, match="last_one_only and first_one_only options are incompatible", ): - outcar._parse_table_pattern( + outcar.read_table_pattern( header_pattern, table_pattern, footer_pattern, From c7fab4b323038320c5049a95ded6f833a8c1ff5b Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 9 Nov 2024 19:54:02 +0800 Subject: [PATCH 14/16] relocate read_pattern --- src/pymatgen/io/vasp/outputs.py | 134 ++++++++++++++++---------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 1d84f63a95d..955e400649e 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -2312,6 +2312,73 @@ def _parse_sci_notation(line: str) -> list[float]: return [float(t) for t in match] return [] + def as_dict(self) -> dict[str, Any]: + """MSONable dict.""" + dct = { + "@module": type(self).__module__, + "@class": type(self).__name__, + "efermi": self.efermi, + "run_stats": self.run_stats, + "magnetization": self.magnetization, + "charge": self.charge, + "total_magnetization": self.total_mag, + "nelect": self.nelect, + "is_stopped": self.is_stopped, + "drift": self.drift, + "ngf": self.ngf, + "sampling_radii": self.sampling_radii, + "electrostatic_potential": self.electrostatic_potential, + } + + if self.lepsilon: + dct |= { + "piezo_tensor": self.piezo_tensor, + "dielectric_tensor": self.dielectric_tensor, + "born": self.born, + } + + if self.dfpt: + dct["internal_strain_tensor"] = self.internal_strain_tensor + + if self.dfpt and self.lepsilon: + dct |= { + "piezo_ionic_tensor": self.piezo_ionic_tensor, + "dielectric_ionic_tensor": self.dielectric_ionic_tensor, + } + + if self.lcalcpol: + dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} + if self.spin and not self.noncollinear: + dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} + dct["zval_dict"] = self.zval_dict + + if self.nmr_cs: + dct.update( + nmr_cs={ + "valence and core": self.data["chemical_shielding"]["valence_and_core"], + "valence_only": self.data["chemical_shielding"]["valence_only"], + "g0": self.data["cs_g0_contribution"], + "core": self.data["cs_core_contribution"], + "raw": self.data["unsym_cs_tensor"], + } + ) + + if self.nmr_efg: + dct.update( + nmr_efg={ + "raw": self.data["unsym_efg_tensor"], + "parameters": self.data["efg"], + } + ) + + if self.has_onsite_density_matrices: + # Cast Spin to str for consistency with electronic_structure + # TODO: improve handling of Enum (de)serialization in monty + onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] + dct["onsite_density_matrices"] = onsite_density_matrices + + return dct + def read_pattern( self, patterns: dict[str, str], @@ -2428,73 +2495,6 @@ def read_table_pattern( self.data[attribute_name] = retained_data return retained_data - def as_dict(self) -> dict[str, Any]: - """MSONable dict.""" - dct = { - "@module": type(self).__module__, - "@class": type(self).__name__, - "efermi": self.efermi, - "run_stats": self.run_stats, - "magnetization": self.magnetization, - "charge": self.charge, - "total_magnetization": self.total_mag, - "nelect": self.nelect, - "is_stopped": self.is_stopped, - "drift": self.drift, - "ngf": self.ngf, - "sampling_radii": self.sampling_radii, - "electrostatic_potential": self.electrostatic_potential, - } - - if self.lepsilon: - dct |= { - "piezo_tensor": self.piezo_tensor, - "dielectric_tensor": self.dielectric_tensor, - "born": self.born, - } - - if self.dfpt: - dct["internal_strain_tensor"] = self.internal_strain_tensor - - if self.dfpt and self.lepsilon: - dct |= { - "piezo_ionic_tensor": self.piezo_ionic_tensor, - "dielectric_ionic_tensor": self.dielectric_ionic_tensor, - } - - if self.lcalcpol: - dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} - if self.spin and not self.noncollinear: - dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} - dct["zval_dict"] = self.zval_dict - - if self.nmr_cs: - dct.update( - nmr_cs={ - "valence and core": self.data["chemical_shielding"]["valence_and_core"], - "valence_only": self.data["chemical_shielding"]["valence_only"], - "g0": self.data["cs_g0_contribution"], - "core": self.data["cs_core_contribution"], - "raw": self.data["unsym_cs_tensor"], - } - ) - - if self.nmr_efg: - dct.update( - nmr_efg={ - "raw": self.data["unsym_efg_tensor"], - "parameters": self.data["efg"], - } - ) - - if self.has_onsite_density_matrices: - # Cast Spin to str for consistency with electronic_structure - # TODO: improve handling of Enum (de)serialization in monty - onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] - dct["onsite_density_matrices"] = onsite_density_matrices - - return dct - def read_electrostatic_potential(self) -> None: """Parse the eletrostatic potential for the last ionic step. From 875d957d0b97a88274624f3180c018e272769da2 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Tue, 12 Nov 2024 10:59:14 +0800 Subject: [PATCH 15/16] add type, use is to check None --- src/pymatgen/io/vasp/outputs.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 955e400649e..fbb45f64dbd 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -1952,18 +1952,18 @@ class Outcar: - read_table_pattern Attributes: - magnetization (tuple[dict]): Magnetization on each ion, e.g. + magnetization (tuple[dict[str, float]]): Magnetization on each ion, e.g. ({"d": 0.0, "p": 0.003, "s": 0.002, "tot": 0.005}, ... ). chemical_shielding (dict): Chemical shielding on each ion with core and valence contributions. unsym_cs_tensor (list): Unsymmetrized chemical shielding tensor matrixes on each ion. e.g. [[[sigma11, sigma12, sigma13], [sigma21, sigma22, sigma23], [sigma31, sigma32, sigma33]], ...] cs_g0_contribution (NDArray): G=0 contribution to chemical shielding. 2D rank 3 matrix. - cs_core_contribution (dict): Core contribution to chemical shielding. e.g. + cs_core_contribution (dict[str, float]): Core contribution to chemical shielding. e.g. {'Mg': -412.8, 'C': -200.5, 'O': -271.1} - efg (tuple[dict]): Electric Field Gradient (EFG) tensor on each ion, e.g. + efg (tuple[dict[str, float]]): Electric Field Gradient (EFG) tensor on each ion, e.g. ({"cq": 0.1, "eta", 0.2, "nuclear_quadrupole_moment": 0.3}, {"cq": 0.7, "eta", 0.8, "nuclear_quadrupole_moment": 0.9}, ...) - charge (tuple[dict]): Charge on each ion, e.g. + charge (tuple[dict[str, float]]): Charge on each ion, e.g. ({"p": 0.154, "s": 0.078, "d": 0.0, "tot": 0.232}, ...) is_stopped (bool): True if OUTCAR is from a stopped run (using STOPCAR, see VASP Manual). run_stats (dict[str, float | None]): Various useful run stats including "System time (sec)", @@ -2001,22 +2001,27 @@ def __init__(self, filename: PathLike) -> None: Args: filename (PathLike): OUTCAR file to parse. """ - self.filename = filename - self.is_stopped = False + self.filename: str = str(filename) + self.is_stopped: bool = False # Assume a compilation with parallelization enabled. # Will be checked later. # If VASP is compiled in serial, the OUTCAR is written slightly differently. - serial_compilation = False + serial_compilation: bool = False - # data from end of OUTCAR + # Data from the end of OUTCAR charge = [] mag_x = [] mag_y = [] mag_z = [] header = [] run_stats: dict[str, float | None] = {} - total_mag = nelect = efermi = e_fr_energy = e_wo_entrp = e0 = None + total_mag: float | None = None + nelect: float | None = None + efermi: float | None = None + e_fr_energy: float | None = None + e_wo_entrp: float | None = None + e0: float | None = None time_patt = re.compile(r"\((sec|kb)\)") efermi_patt = re.compile(r"E-fermi\s*:\s*(\S+)") @@ -2066,7 +2071,8 @@ def __init__(self, filename: PathLike) -> None: e_wo_entrp = float(match[1]) if e0 is None and (match := e0_pattern.search(clean)): e0 = float(match[1]) - if all([nelect, total_mag is not None, efermi is not None, run_stats]): + + if nelect is not None and total_mag is not None and efermi is not None and run_stats: break # For single atom systems, VASP doesn't print a total line, so @@ -2391,13 +2397,13 @@ def read_pattern( arguments. Args: - patterns (dict[str, str]): A dict of patterns, e.g. + patterns (dict[str, str]): Patterns, e.g. {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"}. reverse (bool): Read files in reverse. Defaults to false. Useful for - large files, esp OUTCARs, especially when used with + large files like OUTCARs, especially when used with terminate_on_match. terminate_on_match (bool): Whether to terminate when there is at - least one match in each key in pattern. + least one match for each key in patterns. postprocess (Callable): A post processing function to convert all matches. Defaults to str, i.e., no change. From 726c7dadb0dd52419a7e97f5c028b159eac2e40c Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Tue, 12 Nov 2024 11:16:16 +0800 Subject: [PATCH 16/16] enhance type --- src/pymatgen/util/io_utils.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index f386e438508..92ad644e972 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -28,7 +28,7 @@ def clean_lines( remove_empty_lines: bool = True, rstrip_only: bool = False, ) -> Iterator[str]: - """Strips whitespace, carriage returns and empty lines from a list of strings. + """Remove leading and trailing whitespaces from a list of strings. Args: string_list (list[str]): List of strings. @@ -38,10 +38,10 @@ def clean_lines( to retain leading whitespaces). Defaults to False. Yields: - str: clean strings with no whitespaces. + str: clean string with no leading and trailing whitespaces. """ for string in string_list: - clean_string = string + clean_string: str = string if "#" in string: clean_string = string[: string.index("#")] @@ -60,7 +60,7 @@ def micro_pyawk( ) -> Any: """Small awk-mimicking search routine. - This function goes through each line in the file, and if regex matches that + This function goes through each line in the file, and if `regex` matches that line AND test(results, line) is True (OR test is None) we execute run(results, match), where match is the Match object from running Pattern.match. @@ -69,20 +69,20 @@ def micro_pyawk( filename (PathLike): The file to search through. search (list[tuple[Pattern | str, Callable, Callable]]): The "search program" of 3 elements, i.e. [(regex, test, run), ...]. - Here "regex" is either a Pattern object, or a string that we compile + Here `regex` is either a Pattern object, or a string that we compile into a Pattern. results: An object to store results. Default as an empty dictionary. - Passing a results object let you interact with it via "run" and "test". + Passing a results object let you interact with it via `run` and `test`. Hence, in many occasions it is clever to use the instance itself as results. - debug (Callable): Debug "run". - postdebug (Callable): Another "run" after debug "run". + debug (Callable): Debug `run`. + postdebug (Callable): Post debug `run` after debug `run`. Returns: - Any: The results object. + Any: The updated `results` object. Author: Rickard Armiento, Ioannis Petousis """ - # TODO: remove debug and postdebug after 2025-11-09 if no one is opposing + # TODO: remove `debug` and `postdebug` after 2025-11-09 if no one is opposing if debug is not None: warnings.warn("arg debug is scheduled for removal, see PR4160", DeprecationWarning, stacklevel=2) if postdebug is not None: @@ -91,12 +91,14 @@ def micro_pyawk( if results is None: results = {} - # Compile regex strings to Pattern - search = [(re.compile(regex), test, run) for regex, test, run in search] + # Compile regex strings to Patterns + searches: list[tuple[re.Pattern, Callable, Callable]] = [ + (re.compile(regex), test, run) for regex, test, run in search + ] with zopen(filename, mode="rt") as file: for line in file: - for regex, test, run in search: + for regex, test, run in searches: match = re.search(regex, line) if match is not None and (test is None or test(results, line)):