diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index fac1cacaed8..3db82e306c8 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -1916,62 +1916,82 @@ def as_dict(self) -> dict: class Outcar: - """Parser for data in OUTCAR that is not available in Vasprun.xml. + """Parser for data in OUTCAR that is not available in vasprun.xml. Note, this class works a bit differently than most of the other - VASP objects, since OUTCAR can be very different depending on which + VASP parsers, since OUTCAR can be very different depending on which "type of run" performed. - Create the OUTCAR class with a filename reads "regular parameters" that - are always present. + Creating an Outcar instance with a filename reads "regular parameters" that + are always present. One can then call a specific reader method depending on the + type of run being performed, including (see the docstring of corresponding + method for more details): + - read_avg_core_poten + - read_chemical_shielding + - read_core_state_eigen + - read_corrections + - read_cs_core_contribution + - read_cs_g0_contribution + - read_cs_raw_symmetrized_tensors + - read_elastic_tensor + - read_electrostatic_potential + - read_fermi_contact_shift + - read_freq_dielectric + - read_igpar + - read_internal_strain_tensor + - read_lcalcpol + - read_lepsilon + - read_lepsilon_ionic + - read_neb + - read_nmr_efg + - read_nmr_efg_tensor + - read_onsite_density_matrices + - read_piezo_tensor + - read_pseudo_zval + - read_table_pattern Attributes: - magnetization (tuple): Magnetization on each ion as a tuple of dict, e.g. - ({"d": 0.0, "p": 0.003, "s": 0.002, "tot": 0.005}, ... ) - chemical_shielding (dict): Chemical shielding on each ion as a dictionary with core and valence contributions. - unsym_cs_tensor (list): Unsymmetrized chemical shielding tensor matrixes on each ion as a list. + magnetization (tuple[dict[str, float]]): Magnetization on each ion, e.g. + ({"d": 0.0, "p": 0.003, "s": 0.002, "tot": 0.005}, ... ). + chemical_shielding (dict): Chemical shielding on each ion with core and valence contributions. + unsym_cs_tensor (list): Unsymmetrized chemical shielding tensor matrixes on each ion. e.g. [[[sigma11, sigma12, sigma13], [sigma21, sigma22, sigma23], [sigma31, sigma32, sigma33]], ...] - cs_g0_contribution (np.array): G=0 contribution to chemical shielding. 2D rank 3 matrix. - cs_core_contribution (dict): Core contribution to chemical shielding. dict. e.g. + cs_g0_contribution (NDArray): G=0 contribution to chemical shielding. 2D rank 3 matrix. + cs_core_contribution (dict[str, float]): Core contribution to chemical shielding. e.g. {'Mg': -412.8, 'C': -200.5, 'O': -271.1} - efg (tuple): Electric Field Gradient (EFG) tensor on each ion as a tuple of dict, e.g. + efg (tuple[dict[str, float]]): Electric Field Gradient (EFG) tensor on each ion, e.g. ({"cq": 0.1, "eta", 0.2, "nuclear_quadrupole_moment": 0.3}, {"cq": 0.7, "eta", 0.8, "nuclear_quadrupole_moment": 0.9}, ...) - charge (tuple): Charge on each ion as a tuple of dict, e.g. + charge (tuple[dict[str, float]]): Charge on each ion, e.g. ({"p": 0.154, "s": 0.078, "d": 0.0, "tot": 0.232}, ...) is_stopped (bool): True if OUTCAR is from a stopped run (using STOPCAR, see VASP Manual). - run_stats (dict): Various useful run stats as a dict including "System time (sec)", "Total CPU time used (sec)", - "Elapsed time (sec)", "Maximum memory used (kb)", "Average memory used (kb)", "User time (sec)", "cores". - elastic_tensor (np.array): Total elastic moduli (Kbar) is given in a 6x6 array matrix. - drift (np.array): Total drift for each step in eV/Atom. + run_stats (dict[str, float | None]): Various useful run stats including "System time (sec)", + "Total CPU time used (sec)", "Elapsed time (sec)", "Maximum memory used (kb)", + "Average memory used (kb)", "User time (sec)", "cores". + elastic_tensor (NDArray): Total elastic moduli (Kbar) is given in a 6x6 array matrix. + drift (NDArray): Total drift for each step in eV/Atom. ngf (tuple): Dimensions for the Augmentation grid. - sampling_radii (np.array): Size of the sampling radii in VASP for the test charges for the electrostatic + sampling_radii (NDArray): Size of the sampling radii in VASP for the test charges for the electrostatic potential at each atom. Total array size is the number of elements present in the calculation. - electrostatic_potential (np.array): Average electrostatic potential at each atomic position in order of + electrostatic_potential (NDArray): Average electrostatic potential at each atomic position in order of the atoms in POSCAR. - final_energy_contribs (dict): Individual contributions to the total final energy as a dictionary. + final_energy_contribs (dict[str, float]): Individual contributions to the total final energy. Include contributions from keys, e.g.: {'DENC': -505778.5184347, 'EATOM': 15561.06492564, 'EBANDS': -804.53201231, 'EENTRO': -0.08932659, 'EXHF': 0.0, 'Ediel_sol': 0.0, 'PAW double counting': 664.6726974100002, 'PSCENC': 742.48691646, 'TEWEN': 489742.86847338, 'XCENC': -169.64189814} efermi (float): Fermi energy. - filename (str): Filename. + filename (PathLike): Filename. final_energy (float): Final energy after extrapolation of sigma back to 0, i.e. energy(sigma->0). final_energy_wo_entrp (float): Final energy before extrapolation of sigma, i.e. energy without entropy. final_fr_energy (float): Final "free energy", i.e. free energy TOTEN. has_onsite_density_matrices (bool): Whether onsite density matrices have been set. lcalcpol (bool): If LCALCPOL has been set. lepsilon (bool): If LEPSILON has been set. - nelect (float): Returns the number of electrons in the calculation. - spin (bool): If spin-polarization was enabled via ISPIN. + nelect (float): The number of electrons in the calculation. + spin (bool): If spin-polarization is enabled via ISPIN. total_mag (float): Total magnetization (in terms of the number of unpaired electrons). - One can then call a specific reader depending on the type of run being - performed. These are currently: read_igpar(), read_lepsilon() and - read_lcalcpol(), read_core_state_eign(), read_avg_core_pot(). - - See the documentation of those methods for more documentation. - Authors: Rickard Armiento, Shyue Ping Ong """ @@ -1980,22 +2000,27 @@ def __init__(self, filename: PathLike) -> None: Args: filename (PathLike): OUTCAR file to parse. """ - self.filename = filename - self.is_stopped = False + self.filename: str = str(filename) + self.is_stopped: bool = False # Assume a compilation with parallelization enabled. # Will be checked later. # If VASP is compiled in serial, the OUTCAR is written slightly differently. - serial_compilation = False + serial_compilation: bool = False - # data from end of OUTCAR + # Data from the end of OUTCAR charge = [] mag_x = [] mag_y = [] mag_z = [] header = [] run_stats: dict[str, float | None] = {} - total_mag = nelect = efermi = e_fr_energy = e_wo_entrp = e0 = None + total_mag: float | None = None + nelect: float | None = None + efermi: float | None = None + e_fr_energy: float | None = None + e_wo_entrp: float | None = None + e0: float | None = None time_patt = re.compile(r"\((sec|kb)\)") efermi_patt = re.compile(r"E-fermi\s*:\s*(\S+)") @@ -2045,7 +2070,8 @@ def __init__(self, filename: PathLike) -> None: e_wo_entrp = float(match[1]) if e0 is None and (match := e0_pattern.search(clean)): e0 = float(match[1]) - if all([nelect, total_mag is not None, efermi is not None, run_stats]): + + if nelect is not None and total_mag is not None and efermi is not None and run_stats: break # For single atom systems, VASP doesn't print a total line, so @@ -2136,7 +2162,7 @@ def __init__(self, filename: PathLike) -> None: self.final_fr_energy = e_fr_energy self.data: dict = {} - # Read "total number of plane waves", NPLWV: + # Read NPLWV (total number of plane waves) self.read_pattern( {"nplwv": r"total plane-waves NPLWV =\s+(\*{6}|\d+)"}, terminate_on_match=True, @@ -2274,6 +2300,90 @@ def __init__(self, filename: PathLike) -> None: final_energy_contribs[key] = sum(map(float, self.data[key][-1])) self.final_energy_contribs = final_energy_contribs + @staticmethod + def _parse_sci_notation(line: str) -> list[float]: + """ + Parse lines with values in scientific notation and potentially + without spaces in between the values. This assumes that the scientific + notation always lists two digits for the exponent, e.g. 3.535E-02. + + Args: + line: line to parse. + + Returns: + list[float]: numbers if found, empty list if not. + """ + if match := re.findall(r"[\.\-\d]+E[\+\-]\d{2}", line): + return [float(t) for t in match] + return [] + + def as_dict(self) -> dict[str, Any]: + """MSONable dict.""" + dct = { + "@module": type(self).__module__, + "@class": type(self).__name__, + "efermi": self.efermi, + "run_stats": self.run_stats, + "magnetization": self.magnetization, + "charge": self.charge, + "total_magnetization": self.total_mag, + "nelect": self.nelect, + "is_stopped": self.is_stopped, + "drift": self.drift, + "ngf": self.ngf, + "sampling_radii": self.sampling_radii, + "electrostatic_potential": self.electrostatic_potential, + } + + if self.lepsilon: + dct |= { + "piezo_tensor": self.piezo_tensor, + "dielectric_tensor": self.dielectric_tensor, + "born": self.born, + } + + if self.dfpt: + dct["internal_strain_tensor"] = self.internal_strain_tensor + + if self.dfpt and self.lepsilon: + dct |= { + "piezo_ionic_tensor": self.piezo_ionic_tensor, + "dielectric_ionic_tensor": self.dielectric_ionic_tensor, + } + + if self.lcalcpol: + dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} + if self.spin and not self.noncollinear: + dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} + dct["zval_dict"] = self.zval_dict + + if self.nmr_cs: + dct.update( + nmr_cs={ + "valence and core": self.data["chemical_shielding"]["valence_and_core"], + "valence_only": self.data["chemical_shielding"]["valence_only"], + "g0": self.data["cs_g0_contribution"], + "core": self.data["cs_core_contribution"], + "raw": self.data["unsym_cs_tensor"], + } + ) + + if self.nmr_efg: + dct.update( + nmr_efg={ + "raw": self.data["unsym_efg_tensor"], + "parameters": self.data["efg"], + } + ) + + if self.has_onsite_density_matrices: + # Cast Spin to str for consistency with electronic_structure + # TODO: improve handling of Enum (de)serialization in monty + onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] + dct["onsite_density_matrices"] = onsite_density_matrices + + return dct + def read_pattern( self, patterns: dict[str, str], @@ -2286,32 +2396,32 @@ def read_pattern( arguments. Args: - patterns (dict): A dict of patterns, e.g. + patterns (dict[str, str]): Patterns, e.g. {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"}. reverse (bool): Read files in reverse. Defaults to false. Useful for - large files, esp OUTCARs, especially when used with + large files like OUTCARs, especially when used with terminate_on_match. terminate_on_match (bool): Whether to terminate when there is at - least one match in each key in pattern. + least one match for each key in patterns. postprocess (Callable): A post processing function to convert all matches. Defaults to str, i.e., no change. - Renders accessible: + Renders accessible from self.data: Any attribute in patterns. For example, {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"} will set the value of self.data["energy"] = [[-1234], [-3453], ...], to the - results from regex and postprocess. Note that the returned values - are lists of lists, because you can grep multiple items on one line. + results from regex and postprocess. Note that the values + are list[list], because you can grep multiple items on one line. """ matches = regrep( - self.filename, - patterns, + filename=self.filename, + patterns=patterns, reverse=reverse, terminate_on_match=terminate_on_match, postprocess=postprocess, ) - for k in patterns: - self.data[k] = [i[0] for i in matches.get(k, [])] + for key in patterns: + self.data[key] = [i[0] for i in matches.get(key, [])] def read_table_pattern( self, @@ -2322,7 +2432,7 @@ def read_table_pattern( attribute_name: str | None = None, last_one_only: bool = True, first_one_only: bool = False, - ) -> list: + ) -> list: # TODO: clarify table-like data type r"""Parse table-like data. A table composes of three parts: header, main body, footer. All the data matches "row pattern" in the main body will be returned. @@ -2391,7 +2501,14 @@ def read_table_pattern( return retained_data def read_electrostatic_potential(self) -> None: - """Parse the eletrostatic potential for the last ionic step.""" + """Parse the eletrostatic potential for the last ionic step. + + Renders accessible as attributes: + ngf: TODO: double check + radii: TODO: double check + sampling_radii: TODO: double check + electrostatic_potential (list[float]): The eletrostatic potential. + """ pattern = {"ngf": r"\s+dimension x,y,z NGXF=\s+([\.\-\d]+)\sNGYF=\s+([\.\-\d]+)\sNGZF=\s+([\.\-\d]+)"} self.read_pattern(pattern, postprocess=int) self.ngf = self.data.get("ngf", [[]])[0] @@ -2404,35 +2521,21 @@ def read_electrostatic_potential(self) -> None: table_pattern = r"((?:\s+\d+\s*[\.\-\d]+)+)" footer_pattern = r"\s+E-fermi :" - pots: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern) - _pots: str = "".join(itertools.chain.from_iterable(pots)) - - pots = re.findall(r"\s+\d+\s*([\.\-\d]+)+", _pots) + pot_patterns: list = self.read_table_pattern(header_pattern, table_pattern, footer_pattern) + pot_patterns_str: str = "".join(itertools.chain.from_iterable(pot_patterns)) + pots: list = re.findall(r"\s+\d+\s*([\.\-\d]+)+", pot_patterns_str) self.electrostatic_potential = [*map(float, pots)] - @staticmethod - def _parse_sci_notation(line: str) -> list[float]: - """ - Parse lines with values in scientific notation and potentially - without spaces in between the values. This assumes that the scientific - notation always lists two digits for the exponent, e.g. 3.535E-02. - - Args: - line: line to parse. - - Returns: - list[float]: numbers if found, empty if not. - """ - if match := re.findall(r"[\.\-\d]+E[\+\-]\d{2}", line): - return [float(t) for t in match] - return [] - def read_freq_dielectric(self) -> None: """ Parse the frequency dependent dielectric function (obtained with LOPTICS). Frequencies (in eV) are in self.frequencies, and dielectric tensor function is given as self.dielectric_tensor_function. + + Renders accessible as attributes: TODO: + frequencies: + dielectric_tensor_function: """ plasma_pattern = r"plasma frequency squared.*" dielectric_pattern = ( @@ -2491,8 +2594,9 @@ def read_chemical_shielding(self) -> None: """Parse the NMR chemical shieldings data. Only the second part "absolute, valence and core" will be parsed. And only the three right most field (ISO_SHIELDING, SPAN, SKEW) will be retrieved. - Set self.data["chemical_shielding"] as: - List of chemical shieldings in the order of atoms from the OUTCAR. Maryland notation is adopted. + Renders accessible from self.data: + chemical_shielding (list): Chemical shieldings in the order of atoms + from the OUTCAR. Maryland notation is adopted. """ header_pattern = ( r"\s+CSA tensor \(J\. Mason, Solid State Nucl\. Magn\. Reson\. 2, " @@ -2523,8 +2627,8 @@ def read_chemical_shielding(self) -> None: def read_cs_g0_contribution(self) -> None: """Parse the G0 contribution of NMR chemical shielding. - Set self.data["cs_g0_contribution"] as: - list[list]: G0 contribution matrix. + Renders accessible from self.data: + cs_g0_contribution (list[list]): G0 contribution matrix. """ header_pattern = ( r"^\s+G\=0 CONTRIBUTION TO CHEMICAL SHIFT \(field along BDIR\)\s+$\n" @@ -2546,8 +2650,8 @@ def read_cs_g0_contribution(self) -> None: def read_cs_core_contribution(self) -> None: """Parse the core contribution of NMR chemical shielding. - Set self.data["cs_core_contribution"] as: - list[list]: G0 contribution matrix. + Renders accessible from self.data: + cs_core_contribution (list[list]): G0 contribution matrix. """ header_pattern = r"^\s+Core NMR properties\s*$\n\n^\s+typ\s+El\s+Core shift \(ppm\)\s*$\n^\s+-{20,}$\n" row_pattern = r"\d+\s+(?P[A-Z][a-z]?\w?)\s+(?P[-]?\d+\.\d+)" @@ -2566,8 +2670,8 @@ def read_cs_core_contribution(self) -> None: def read_cs_raw_symmetrized_tensors(self) -> None: """Parse the matrix form of NMR tensor before corrected to table. - Returns: - nsymmetrized tensors list in the order of atoms. + Renders accessible from self.data: TODO: + unsym_cs_tensor (list[list]): nsymmetrized tensors in the order of atoms. """ header_pattern = r"\s+-{50,}\s+\s+Absolute Chemical Shift tensors\s+\s+-{50,}$" first_part_pattern = r"\s+UNSYMMETRIZED TENSORS\s+$" @@ -2604,7 +2708,7 @@ def read_nmr_efg_tensor(self) -> list[NDArray]: """Parses the NMR Electric Field Gradient Raw Tensors. Returns: - A list of Electric Field Gradient Tensors in the order of Atoms from OUTCAR. + list[NDArray]: Electric Field Gradient Tensors in the order of atoms. """ header_pattern = ( r"Electric field gradients \(V/A\^2\)\n-*\n ion\s+V_xx\s+V_yy\s+V_zz\s+V_xy\s+V_xz\s+V_yz\n-*\n" @@ -2621,9 +2725,9 @@ def read_nmr_efg_tensor(self) -> list[NDArray]: def read_nmr_efg(self) -> None: """Parse the NMR Electric Field Gradient interpreted values. - Set self.data["efg"] as: - Electric Field Gradient tensors as a list of dict in the order of atoms from OUTCAR. - Each dict key/value pair corresponds to a component of the tensors. + Renders accessible from self.data: + efg (list[dict]): Electric Field Gradient tensors in the order of atoms. + Each dict key/value pair corresponds to a component of the tensors. """ header_pattern = ( r"^\s+NMR quadrupolar parameters\s+$\n" @@ -2651,8 +2755,8 @@ def read_elastic_tensor(self) -> None: """ Parse the elastic tensor data. - Set self.data["elastic_tensor"] as: - 6x6 array corresponding to the elastic tensor from the OUTCAR. + Renders accessible from self.data: + elastic_tensor: 6x6 array corresponding to the elastic tensor. """ header_pattern = r"TOTAL ELASTIC MODULI \(kBar\)\s+Direction\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[X-Z][X-Z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) @@ -2661,7 +2765,11 @@ def read_elastic_tensor(self) -> None: self.data["elastic_tensor"] = et_table def read_piezo_tensor(self) -> None: - """Parse the piezo tensor data.""" + """Parse the piezo tensor data. + + Renders accessible from self.data: + piezo_tensor: TODO: fill value type. + """ header_pattern = r"PIEZOELECTRIC TENSOR for field in x, y, z\s+\(C/m\^2\)\s+([X-Z][X-Z]\s+)+\-+" row_pattern = r"[x-z]\s+" + r"\s+".join([r"(\-*[\.\d]+)"] * 6) footer_pattern = r"BORN EFFECTIVE" @@ -2671,8 +2779,8 @@ def read_piezo_tensor(self) -> None: def read_onsite_density_matrices(self) -> None: """Parse the onsite density matrices. - Set self.data["onsite_density_matrices"] as: - List with index corresponding to atom index in Structure. + Renders accessible from self.data: TODO: + onsite_density_matrices (list[dict]): List with index corresponding to atom index in Structure. """ # Matrix size will vary depending on if d or f orbitals are present. # Therefore regex assumes f, but filter out None values if d. @@ -2713,12 +2821,14 @@ def read_corrections( reverse: bool = True, terminate_on_match: bool = True, ) -> None: - """Read the dipol qudropol corrections into - self.data["dipol_quadrupol_correction"]. + """Read the dipol qudropol corrections. Args: reverse (bool): Whether to start from end of OUTCAR. Defaults to True. terminate_on_match (bool): Whether to terminate once match is found. Defaults to True. + + Renders accessible from self.data: + dipol_quadrupol_correction: TODO: fill details. """ patterns = {"dipol_quadrupol_correction": r"dipol\+quadrupol energy correction\s+([\d\-\.]+)"} self.read_pattern( @@ -2741,17 +2851,15 @@ def read_neb( Args: reverse (bool): Read files in reverse. Defaults to false. Useful for - large files, esp OUTCARs, especially when used with - terminate_on_match. Defaults to True here since we usually - want only the final value. + large files, especially when used with terminate_on_match. + Defaults to True here since we usually want only the final value. terminate_on_match (bool): Whether to terminate when there is at least one match in each key in pattern. Defaults to True here since we usually want only the final value. - Renders accessible: - tangent_force - Final tangent force. - energy - Final energy. - These can be accessed under Outcar.data[key] + Renders accessible from self.data: + tangent_force (float): Final tangent force. + energy (float): Final energy. """ patterns = { "energy": r"energy\(sigma->0\)\s+=\s+([\d\-\.]+)", @@ -2774,17 +2882,18 @@ def read_igpar(self) -> None: See VASP sections "LBERRY, IGPAR, NPPSTR, DIPOL" for info on what these are. - Renders accessible: - er_ev = e_ev (dictionary with Spin.up/Spin.down as keys) - er_bp = e_bp (dictionary with Spin.up/Spin.down as keys) - er_ev_tot = spin up + spin down summed - er_bp_tot = spin up + spin down summed - p_elc = spin up + spin down summed - p_ion = spin up + spin down summed. + Renders accessible as attributes: # TODO: double check type + er_ev (dict): e_ev (Spin.up/Spin.down as keys). + er_bp (dict): e_bp (Spin.up/Spin.down as keys). + er_ev_tot: spin up + spin down summed. + er_bp_tot: spin up + spin down summed. + p_elec (int): spin up + spin down summed. + p_ion (int): spin up + spin down summed. """ # Variables to be filled - self.er_ev = {} # dict (Spin.up/down) of array(3*float) - self.er_bp = {} # dict (Spin.up/down) of array(3*float) + # TODO: double check type + self.er_ev: dict = {} # (Spin.up/down) of array(3*float) + self.er_bp: dict = {} # (Spin.up/down) of array(3*float) self.er_ev_tot = None # array(3*float) self.er_bp_tot = None # array(3*float) self.p_elec: int | None = None @@ -2879,10 +2988,12 @@ def p_ion(results, match): except Exception as exc: raise RuntimeError("IGPAR OUTCAR could not be parsed.") from exc - def read_internal_strain_tensor(self): - """Read the internal strain tensor and populates - self.internal_strain_tensor with an array of voigt notation - tensors for each site. + def read_internal_strain_tensor(self) -> None: + """Read the internal strain tensor. + + Renders accessible as attributes: + # TODO: add type + internal_strain_tensor: an array of voigt notation tensors for each site. """ search = [] @@ -2928,7 +3039,8 @@ def internal_strain_data(results, match: str) -> None: def read_lepsilon(self) -> None: """Read a LEPSILON run. - TODO: Document the actual variables. + Renders accessible as attributes: + TODO: """ try: search = [] @@ -3084,7 +3196,8 @@ def born_section_stop(results, _match): def read_lepsilon_ionic(self) -> None: """Read the ionic component of a LEPSILON run. - TODO: Document the actual variables. + Renders accessible as attributes: + TODO: """ try: search = [] @@ -3210,7 +3323,8 @@ def piezo_section_stop(results, _match): def read_lcalcpol(self) -> None: """Read the LCALCPOL. - TODO: Document the actual variables. + Renders accessible as attributes: + TODO: """ self.p_elec = None self.p_sp1: int | None = None @@ -3313,7 +3427,11 @@ def p_ion(results, match): raise RuntimeError("LCALCPOL OUTCAR could not be parsed.") from exc def read_pseudo_zval(self) -> None: - """Create a pseudopotential ZVAL dictionary.""" + """Create a pseudopotential ZVAL dictionary. + + Renders accessible as attributes: + TODO: + """ try: def atom_symbols(results, match): @@ -3339,6 +3457,7 @@ def zvals(results, match): self.zval_dict = dict(zip(self.atom_symbols, self.zvals, strict=True)) # type: ignore[attr-defined] # Clean up + # TODO: is del necessary (need benchmark)? del self.atom_symbols # type: ignore[attr-defined] del self.zvals # type: ignore[attr-defined] @@ -3349,7 +3468,8 @@ def read_core_state_eigen(self) -> list[dict]: """Read the core state eigenenergies at each ionic step. Returns: - A list of dict over the atom such as [{"AO":[core state eig]}]. + list[dict]: The atom such as [{"AO":[core state eig]}]. + # TODO: what is "[core state eig]"? array or "core_state_eig" likely the latter The core state eigenenergie list for each AO is over all ionic step. @@ -3392,8 +3512,9 @@ def read_avg_core_poten(self) -> list[list]: """Read the core potential at each ionic step. Returns: - A list for each ionic step containing a list of the average core - potentials for each atom: [[avg core pot]]. + list[list]: A list for each ionic step containing a list of + the average core potentials for each atom: [[avg core pot]]. + TODO: what is "[avg core pot]", is it an array of 3 or "avg_core_pot"? Example: The average core potential of the 2nd atom of the structure at the @@ -3426,73 +3547,6 @@ def read_avg_core_poten(self) -> list[list]: return aps - def as_dict(self) -> dict: - """MSONable dict.""" - dct = { - "@module": type(self).__module__, - "@class": type(self).__name__, - "efermi": self.efermi, - "run_stats": self.run_stats, - "magnetization": self.magnetization, - "charge": self.charge, - "total_magnetization": self.total_mag, - "nelect": self.nelect, - "is_stopped": self.is_stopped, - "drift": self.drift, - "ngf": self.ngf, - "sampling_radii": self.sampling_radii, - "electrostatic_potential": self.electrostatic_potential, - } - - if self.lepsilon: - dct |= { - "piezo_tensor": self.piezo_tensor, - "dielectric_tensor": self.dielectric_tensor, - "born": self.born, - } - - if self.dfpt: - dct["internal_strain_tensor"] = self.internal_strain_tensor - - if self.dfpt and self.lepsilon: - dct |= { - "piezo_ionic_tensor": self.piezo_ionic_tensor, - "dielectric_ionic_tensor": self.dielectric_ionic_tensor, - } - - if self.lcalcpol: - dct |= {"p_elec": self.p_elec, "p_ion": self.p_ion} - if self.spin and not self.noncollinear: - dct |= {"p_sp1": self.p_sp1, "p_sp2": self.p_sp2} - dct["zval_dict"] = self.zval_dict - - if self.nmr_cs: - dct.update( - nmr_cs={ - "valence and core": self.data["chemical_shielding"]["valence_and_core"], - "valence_only": self.data["chemical_shielding"]["valence_only"], - "g0": self.data["cs_g0_contribution"], - "core": self.data["cs_core_contribution"], - "raw": self.data["unsym_cs_tensor"], - } - ) - - if self.nmr_efg: - dct.update( - nmr_efg={ - "raw": self.data["unsym_efg_tensor"], - "parameters": self.data["efg"], - } - ) - - if self.has_onsite_density_matrices: - # Cast Spin to str for consistency with electronic_structure - # TODO: improve handling of Enum (de)serialization in monty - onsite_density_matrices = [{str(k): v for k, v in d.items()} for d in self.data["onsite_density_matrices"]] - dct["onsite_density_matrices"] = onsite_density_matrices - - return dct - def read_fermi_contact_shift(self) -> None: """Read Fermi contact (isotropic) hyperfine coupling parameter. diff --git a/src/pymatgen/util/io_utils.py b/src/pymatgen/util/io_utils.py index f8c7d268f43..92ad644e972 100644 --- a/src/pymatgen/util/io_utils.py +++ b/src/pymatgen/util/io_utils.py @@ -2,14 +2,17 @@ from __future__ import annotations -import os import re +import warnings from typing import TYPE_CHECKING from monty.io import zopen if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable, Iterator + from typing import Any + + from pymatgen.util.typing import PathLike __author__ = "Shyue Ping Ong, Rickard Armiento, Anubhav Jain, G Matteo, Ioannis Petousis" __copyright__ = "Copyright 2011, The Materials Project" @@ -25,7 +28,7 @@ def clean_lines( remove_empty_lines: bool = True, rstrip_only: bool = False, ) -> Iterator[str]: - """Strips whitespace, carriage returns and empty lines from a list of strings. + """Remove leading and trailing whitespaces from a list of strings. Args: string_list (list[str]): List of strings. @@ -35,10 +38,10 @@ def clean_lines( to retain leading whitespaces). Defaults to False. Yields: - str: clean strings with no whitespaces. + str: clean string with no leading and trailing whitespaces. """ for string in string_list: - clean_string = string + clean_string: str = string if "#" in string: clean_string = string[: string.index("#")] @@ -48,52 +51,62 @@ def clean_lines( yield clean_string -def micro_pyawk(filename, search, results=None, debug=None, postdebug=None): +def micro_pyawk( + filename: PathLike, + search: list[tuple[re.Pattern | str, Callable, Callable]], + results: Any | None = None, + debug: Callable | None = None, + postdebug: Callable | None = None, +) -> Any: """Small awk-mimicking search routine. - 'file' is file to search through. - 'search' is the "search program", a list of lists/tuples with 3 elements; - i.e. [[regex, test, run], [regex, test, run], ...] - 'results' is a an object that your search program will have access to for - storing results. - - Here regex is either as a Regex object, or a string that we compile into a - Regex. test and run are callable objects. + This function goes through each line in the file, and if `regex` matches that + line AND test(results, line) is True (OR test is None) we execute + run(results, match), where match is the Match object from running + Pattern.match. - This function goes through each line in filename, and if regex matches that - line *and* test(results,line)==True (or test is None) we execute - run(results,match), where match is the match object from running - Regex.match. + Args: + filename (PathLike): The file to search through. + search (list[tuple[Pattern | str, Callable, Callable]]): The "search program" of + 3 elements, i.e. [(regex, test, run), ...]. + Here `regex` is either a Pattern object, or a string that we compile + into a Pattern. + results: An object to store results. Default as an empty dictionary. + Passing a results object let you interact with it via `run` and `test`. + Hence, in many occasions it is clever to use the instance itself as results. + debug (Callable): Debug `run`. + postdebug (Callable): Post debug `run` after debug `run`. - The default results is an empty dictionary. Passing a results object let - you interact with it in run() and test(). Hence, in many occasions it is - thus clever to use results=self. + Returns: + Any: The updated `results` object. Author: Rickard Armiento, Ioannis Petousis - - Returns: - dict[str, Any]: The results dictionary. """ + # TODO: remove `debug` and `postdebug` after 2025-11-09 if no one is opposing + if debug is not None: + warnings.warn("arg debug is scheduled for removal, see PR4160", DeprecationWarning, stacklevel=2) + if postdebug is not None: + warnings.warn("arg postdebug is scheduled for removal, see PR4160", DeprecationWarning, stacklevel=2) + if results is None: results = {} - # Compile regex strings - for entry in search: - entry[0] = re.compile(entry[0]) + # Compile regex strings to Patterns + searches: list[tuple[re.Pattern, Callable, Callable]] = [ + (re.compile(regex), test, run) for regex, test, run in search + ] with zopen(filename, mode="rt") as file: for line in file: - for entry in search: - match = re.search(entry[0], line) - if match and (entry[1] is None or entry[1](results, line)): + for regex, test, run in searches: + match = re.search(regex, line) + + if match is not None and (test is None or test(results, line)): if debug is not None: debug(results, match) - entry[2](results, match) + + run(results, match) if postdebug is not None: postdebug(results, match) return results - - -umask = os.umask(0) -os.umask(umask)