Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated LAMMPS dump file topology parser #4995

Merged
merged 10 commits into from
Apr 8, 2025
2 changes: 2 additions & 0 deletions package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ Chronological list of authors
- Lexi Xu
- BHM-Bob G
- Yu-Yuan (Stuart) Yang
- James Rowe


External code
-------------
Expand Down
106 changes: 87 additions & 19 deletions package/MDAnalysis/topology/LAMMPSParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,15 @@
Bonds,
Charges,
Dihedrals,
Elements,
Impropers,
Masses,
Resids,
Resnums,
Segids,
)
from ..guesser.tables import SYMB2Z
from ..guesser.tables import masses as mass_table

logger = logging.getLogger("MDAnalysis.topology.LAMMPS")

Expand Down Expand Up @@ -602,6 +605,18 @@ def _parse_box(self, header):
return unitcell


# Define the headers that define topology information from the lammps file
# Any headers outside of this set will be ignored
DUMP_HEADERS = {
"id": {"attr_class": Atomids, "default": None, "dtype": np.int32},
"mol": {"attr_class": [Resids, Resnums], "default": 1, "dtype": int},
"type": {"attr_class": Atomtypes, "default": 1, "dtype": object},
"mass": {"attr_class": Masses, "default": 1.0, "dtype": np.float64},
"element": {"attr_class": Elements, "default": None, "dtype": object},
"q": {"attr_class": Charges, "default": None, "dtype": np.float32},
}


class LammpsDumpParser(TopologyReaderBase):
"""Parses Lammps ascii dump files in 'atom' format.

Expand All @@ -627,33 +642,86 @@ def parse(self, **kwargs):
fin.readline() # y
fin.readline() # z

indices = np.zeros(natoms, dtype=int)
types = np.zeros(natoms, dtype=object)

# Next line contains the headers for the atom data
atomline = fin.readline() # ITEM ATOMS
attrs = atomline.split()[2:] # attributes on coordinate line
col_ids = {attr: i for i, attr in enumerate(attrs)} # column ids

col_ids = {
attr: i for i, attr in enumerate(attrs) if attr in DUMP_HEADERS
} # column ids
if "id" not in col_ids:
raise ValueError("No id column found in dump file")
if "mass" not in col_ids:
if "element" in col_ids:
warnings.warn(
"No mass column found in dump file. "
"Using guessed masses from element info."
)
else:
warnings.warn("Guessed all Masses to 1.0")
if "type" not in col_ids:
warnings.warn("Set all atom types to 1")

atom_data = {}
# Initialize the atom data arrays
for header in DUMP_HEADERS:
if header in col_ids:
atom_data[header] = np.zeros(
natoms, dtype=DUMP_HEADERS[header]["dtype"]
)
elif DUMP_HEADERS[header]["default"] is not None:
atom_data[header] = np.full(
natoms,
DUMP_HEADERS[header]["default"],
dtype=DUMP_HEADERS[header]["dtype"],
)
# Read the atom data
for i in range(natoms):
fields = fin.readline().split()
for header, col_id in col_ids.items():
atom_data[header][i] = fields[col_id]

# Check for valid elements and set masses by element if not given
if "element" in col_ids:
validated_elements = []
for elem in atom_data["element"]:
if elem.capitalize() in SYMB2Z:
validated_elements.append(elem.capitalize())
else:
wmsg = (
f"Unknown element {elem} found for some atoms. "
f"These have been given an empty element record. "
)
warnings.warn(wmsg)
validated_elements.append("")
atom_data["element"] = np.array(
validated_elements, dtype=DUMP_HEADERS["element"]["dtype"]
)
if "mass" not in col_ids:
for i, elem in enumerate(validated_elements):
try:
atom_data["mass"][i] = mass_table[elem]
except KeyError:
atom_data["mass"][i] = 1.0

indices[i] = fields[col_ids["id"]]
types[i] = fields[col_ids["type"]]

order = np.argsort(indices)
indices = indices[order]
types = types[order]
# Reorder the atom data by id
order = np.argsort(atom_data["id"])

attrs = []
attrs.append(Atomids(indices))
attrs.append(Atomtypes(types))
attrs.append(Masses(np.ones(natoms, dtype=np.float64), guessed=True))
warnings.warn("Guessed all Masses to 1.0")
attrs.append(Resids(np.array([1], dtype=int)))
attrs.append(Resnums(np.array([1], dtype=int)))
attrs.append(Segids(np.array(["SYSTEM"], dtype=object)))
for key, value in atom_data.items():
if key == "mol":
# Get the number of unique residues
# and the assignment of each atom to a residue
residx, resids = squash_by(value[order])[:2]
n_residues = len(resids)
for attr_class in DUMP_HEADERS[key]["attr_class"]:
attrs.append(attr_class(resids))
else:
attrs.append(DUMP_HEADERS[key]["attr_class"](value[order]))

return Topology(natoms, 1, 1, attrs=attrs)
attrs.append(Segids(np.array(["SYSTEM"], dtype=object)))
return Topology(
natoms, n_residues, 1, attrs=attrs, atom_resindex=residx
)


@functools.total_ordering
Expand Down
39 changes: 39 additions & 0 deletions testsuite/MDAnalysisTests/data/lammps/mass_q_elem.lammpstrj
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
ITEM: TIMESTEP
0
ITEM: NUMBER OF ATOMS
30
ITEM: BOX BOUNDS pp pp pp
-1.6350000000000001e-01 3.8836500000000001e+01
-1.7050000000000001e-01 3.8829500000000003e+01
-1.1550000000000001e-01 9.4884500000000003e+01
ITEM: ATOMS id mol type x y z element q proc mass
1 1 9 6.10782 11.4384 0.798271 C -0.27 0 12.011
2 1 6 5.73938 10.7721 1.60738 H 0.09 0 1.008
3 1 6 5.36179 11.4598 -0.0247166 H 0.09 0 1.008
4 1 6 6.22277 12.4656 1.20551 H 0.09 0 1.008
5 1 7 7.41283 10.9555 0.293156 C 0.51 0 12.011
6 1 15 8.43683 11.6139 0.447366 O -0.51 0 15.9994
25 2 5 11.6777 7.51021 91.0484 H 0.09 0 1.008
7 1 14 7.41086 9.75757 94.689 N -0.47 0 14.007
8 1 1 6.55183 9.26307 94.5816 H 0.31 0 1.008
11 1 3 9.37844 9.17237 94.7817 H 0.09 0 1.008
16 2 5 10.6148 7.68449 93.1683 H 0.09 0 1.008
10 1 3 8.30811 8.18016 93.7248 H 0.09 0 1.008
9 1 8 8.58568 9.17251 94.0482 C -0.02 0 12.011
12 1 7 9.07397 9.91809 92.8201 C 0.51 0 12.011
13 1 15 8.35715 10.7673 92.2861 O -0.51 0 15.9994
14 2 13 10.274 9.63057 92.3198 N -0.29 0 14.007
15 2 12 11.1655 8.59754 92.8563 C 0 0 12.011
17 2 5 11.7158 9.02714 93.7206 H 0.09 0 1.008
18 2 10 10.7877 10.2091 91.0804 C 0.02 0 12.011
19 2 2 10.8053 11.2774 91.238 H 0.09 0 1.008
20 2 11 12.2025 9.60943 90.953 C -0.18 0 12.011
21 2 5 12.9179 10.2786 91.4772 H 0.09 0 1.008
22 2 5 12.5386 9.49027 89.9008 H 0.09 0 1.008
23 2 11 12.127 8.282 91.7093 C -0.18 0 12.011
24 2 5 13.1195 7.93535 92.0685 H 0.09 0 1.008
26 2 7 9.90917 9.92876 89.854 C 0.51 0 12.011
27 2 15 9.25139 8.88177 89.8474 O -0.51 0 15.9994
28 3 13 9.84845 10.7872 88.8235 N -0.29 0 14.007
29 3 12 10.561 12.0708 88.7803 C 0 0 12.011
30 3 5 10.318 12.7002 89.6629 H 0.09 0 1.008
14 changes: 14 additions & 0 deletions testsuite/MDAnalysisTests/data/lammps/nomass_elemx.lammpstrj
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
ITEM: TIMESTEP
0
ITEM: NUMBER OF ATOMS
5
ITEM: BOX BOUNDS pp pp pp
-1.6350000000000001e-01 3.8836500000000001e+01
-1.7050000000000001e-01 3.8829500000000003e+01
-1.1550000000000001e-01 9.4884500000000003e+01
ITEM: ATOMS id mol type x y z q element
1 1 9 6.10782 11.4384 0.798271 -0.27 C
2 1 6 5.73938 10.7721 1.60738 0.09 X
3 1 6 5.36179 11.4598 -0.0247166 0.09 H
4 1 6 6.22277 12.4656 1.20551 0.09 H
5 1 7 7.41283 10.9555 0.293156 0.51 C
6 changes: 6 additions & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@
"LAMMPSdata_additional_columns", # structure for the additional column lammpstrj
"LAMMPSDUMP",
"LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame
"LAMMPSDUMP_allinfo", # lammpsdump file with resids, masses, charges and element symbols
"LAMMPSDUMP_nomass_elemx", # lammps dump file with no masses, but with element symbols and one symbol 'X'
"LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example
"LAMMPSDUMP_nocoords", # lammpsdump file with no coordinates
"LAMMPSDUMP_triclinic", # lammpsdump file to test triclinic dimension parsing, albite with most atoms deleted
Expand Down Expand Up @@ -753,6 +755,10 @@
LAMMPSdata_PairIJ = (_data_ref / "lammps/pairij_coeffs.data.bz2").as_posix()
LAMMPSDUMP = (_data_ref / "lammps/wat.lammpstrj.bz2").as_posix()
LAMMPSDUMP_long = (_data_ref / "lammps/wat.lammpstrj_long.bz2").as_posix()
LAMMPSDUMP_allinfo = (_data_ref / "lammps/mass_q_elem.lammpstrj").as_posix()
LAMMPSDUMP_nomass_elemx = (
_data_ref / "lammps/nomass_elemx.lammpstrj"
).as_posix()
LAMMPSDUMP_allcoords = (
_data_ref / "lammps/spce_all_coords.lammpstrj.bz2"
).as_posix()
Expand Down
Loading