MDAnalysis · orbeckst · Apr 8, 2025 · Mar 27, 2025 · Mar 28, 2025 · Mar 29, 2025
diff --git a/package/AUTHORS b/package/AUTHORS
@@ -252,6 +252,8 @@ Chronological list of authors
   - Lexi Xu
   - BHM-Bob G  
   - Yu-Yuan (Stuart) Yang
+  - James Rowe  
+
 
 External code
 -------------

diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py
@@ -98,12 +98,15 @@
     Bonds,
     Charges,
     Dihedrals,
+    Elements,
     Impropers,
     Masses,
     Resids,
     Resnums,
     Segids,
 )
+from ..guesser.tables import SYMB2Z
+from ..guesser.tables import masses as mass_table
 
 logger = logging.getLogger("MDAnalysis.topology.LAMMPS")
 
@@ -602,6 +605,18 @@ def _parse_box(self, header):
         return unitcell
 
 
+# Define the headers that define topology information from the lammps file
+# Any headers outside of this set will be ignored
+DUMP_HEADERS = {
+    "id": {"attr_class": Atomids, "default": None, "dtype": np.int32},
+    "mol": {"attr_class": [Resids, Resnums], "default": 1, "dtype": int},
+    "type": {"attr_class": Atomtypes, "default": 1, "dtype": object},
+    "mass": {"attr_class": Masses, "default": 1.0, "dtype": np.float64},
+    "element": {"attr_class": Elements, "default": None, "dtype": object},
+    "q": {"attr_class": Charges, "default": None, "dtype": np.float32},
+}
+
+
 class LammpsDumpParser(TopologyReaderBase):
     """Parses Lammps ascii dump files in 'atom' format.
 
@@ -627,33 +642,86 @@ def parse(self, **kwargs):
             fin.readline()  # y
             fin.readline()  # z
 
-            indices = np.zeros(natoms, dtype=int)
-            types = np.zeros(natoms, dtype=object)
-
+            # Next line contains the headers for the atom data
             atomline = fin.readline()  # ITEM ATOMS
             attrs = atomline.split()[2:]  # attributes on coordinate line
-            col_ids = {attr: i for i, attr in enumerate(attrs)}  # column ids
-
+            col_ids = {
+                attr: i for i, attr in enumerate(attrs) if attr in DUMP_HEADERS
+            }  # column ids
+            if "id" not in col_ids:
+                raise ValueError("No id column found in dump file")
+            if "mass" not in col_ids:
+                if "element" in col_ids:
+                    warnings.warn(
+                        "No mass column found in dump file. "
+                        "Using guessed masses from element info."
+                    )
+                else:
+                    warnings.warn("Guessed all Masses to 1.0")
+            if "type" not in col_ids:
+                warnings.warn("Set all atom types to 1")
+
+            atom_data = {}
+            # Initialize the atom data arrays
+            for header in DUMP_HEADERS:
+                if header in col_ids:
+                    atom_data[header] = np.zeros(
+                        natoms, dtype=DUMP_HEADERS[header]["dtype"]
+                    )
+                elif DUMP_HEADERS[header]["default"] is not None:
+                    atom_data[header] = np.full(
+                        natoms,
+                        DUMP_HEADERS[header]["default"],
+                        dtype=DUMP_HEADERS[header]["dtype"],
+                    )
+            # Read the atom data
             for i in range(natoms):
                 fields = fin.readline().split()
+                for header, col_id in col_ids.items():
+                    atom_data[header][i] = fields[col_id]
+
+        # Check for valid elements and set masses by element if not given
+        if "element" in col_ids:
+            validated_elements = []
+            for elem in atom_data["element"]:
+                if elem.capitalize() in SYMB2Z:
+                    validated_elements.append(elem.capitalize())
+                else:
+                    wmsg = (
+                        f"Unknown element {elem} found for some atoms. "
+                        f"These have been given an empty element record. "
+                    )
+                    warnings.warn(wmsg)
+                    validated_elements.append("")
+            atom_data["element"] = np.array(
+                validated_elements, dtype=DUMP_HEADERS["element"]["dtype"]
+            )
+            if "mass" not in col_ids:
+                for i, elem in enumerate(validated_elements):
+                    try:
+                        atom_data["mass"][i] = mass_table[elem]
+                    except KeyError:
+                        atom_data["mass"][i] = 1.0
 
-                indices[i] = fields[col_ids["id"]]
-                types[i] = fields[col_ids["type"]]
-
-        order = np.argsort(indices)
-        indices = indices[order]
-        types = types[order]
+        # Reorder the atom data by id
+        order = np.argsort(atom_data["id"])
 
         attrs = []
-        attrs.append(Atomids(indices))
-        attrs.append(Atomtypes(types))
-        attrs.append(Masses(np.ones(natoms, dtype=np.float64), guessed=True))
-        warnings.warn("Guessed all Masses to 1.0")
-        attrs.append(Resids(np.array([1], dtype=int)))
-        attrs.append(Resnums(np.array([1], dtype=int)))
-        attrs.append(Segids(np.array(["SYSTEM"], dtype=object)))
+        for key, value in atom_data.items():
+            if key == "mol":
+                # Get the number of unique residues
+                # and the assignment of each atom to a residue
+                residx, resids = squash_by(value[order])[:2]
+                n_residues = len(resids)
+                for attr_class in DUMP_HEADERS[key]["attr_class"]:
+                    attrs.append(attr_class(resids))
+            else:
+                attrs.append(DUMP_HEADERS[key]["attr_class"](value[order]))
 
-        return Topology(natoms, 1, 1, attrs=attrs)
+        attrs.append(Segids(np.array(["SYSTEM"], dtype=object)))
+        return Topology(
+            natoms, n_residues, 1, attrs=attrs, atom_resindex=residx
+        )
 
 
 @functools.total_ordering

diff --git a/testsuite/MDAnalysisTests/data/lammps/mass_q_elem.lammpstrj b/testsuite/MDAnalysisTests/data/lammps/mass_q_elem.lammpstrj
@@ -0,0 +1,39 @@
+ITEM: TIMESTEP
+0
+ITEM: NUMBER OF ATOMS
+30
+ITEM: BOX BOUNDS pp pp pp
+-1.6350000000000001e-01 3.8836500000000001e+01
+-1.7050000000000001e-01 3.8829500000000003e+01
+-1.1550000000000001e-01 9.4884500000000003e+01
+ITEM: ATOMS id mol type x y z element q proc mass
+1 1 9 6.10782 11.4384 0.798271 C -0.27 0 12.011
+2 1 6 5.73938 10.7721 1.60738 H 0.09 0 1.008
+3 1 6 5.36179 11.4598 -0.0247166 H 0.09 0 1.008
+4 1 6 6.22277 12.4656 1.20551 H 0.09 0 1.008
+5 1 7 7.41283 10.9555 0.293156 C 0.51 0 12.011
+6 1 15 8.43683 11.6139 0.447366 O -0.51 0 15.9994
+25 2 5 11.6777 7.51021 91.0484 H 0.09 0 1.008
+7 1 14 7.41086 9.75757 94.689 N -0.47 0 14.007
+8 1 1 6.55183 9.26307 94.5816 H 0.31 0 1.008
+11 1 3 9.37844 9.17237 94.7817 H 0.09 0 1.008
+16 2 5 10.6148 7.68449 93.1683 H 0.09 0 1.008
+10 1 3 8.30811 8.18016 93.7248 H 0.09 0 1.008
+9 1 8 8.58568 9.17251 94.0482 C -0.02 0 12.011
+12 1 7 9.07397 9.91809 92.8201 C 0.51 0 12.011
+13 1 15 8.35715 10.7673 92.2861 O -0.51 0 15.9994
+14 2 13 10.274 9.63057 92.3198 N -0.29 0 14.007
+15 2 12 11.1655 8.59754 92.8563 C 0 0 12.011
+17 2 5 11.7158 9.02714 93.7206 H 0.09 0 1.008
+18 2 10 10.7877 10.2091 91.0804 C 0.02 0 12.011
+19 2 2 10.8053 11.2774 91.238 H 0.09 0 1.008
+20 2 11 12.2025 9.60943 90.953 C -0.18 0 12.011
+21 2 5 12.9179 10.2786 91.4772 H 0.09 0 1.008
+22 2 5 12.5386 9.49027 89.9008 H 0.09 0 1.008
+23 2 11 12.127 8.282 91.7093 C -0.18 0 12.011
+24 2 5 13.1195 7.93535 92.0685 H 0.09 0 1.008
+26 2 7 9.90917 9.92876 89.854 C 0.51 0 12.011
+27 2 15 9.25139 8.88177 89.8474 O -0.51 0 15.9994
+28 3 13 9.84845 10.7872 88.8235 N -0.29 0 14.007
+29 3 12 10.561 12.0708 88.7803 C 0 0 12.011
+30 3 5 10.318 12.7002 89.6629 H 0.09 0 1.008
diff --git a/testsuite/MDAnalysisTests/data/lammps/nomass_elemx.lammpstrj b/testsuite/MDAnalysisTests/data/lammps/nomass_elemx.lammpstrj
@@ -0,0 +1,14 @@
+ITEM: TIMESTEP
+0
+ITEM: NUMBER OF ATOMS
+5
+ITEM: BOX BOUNDS pp pp pp
+-1.6350000000000001e-01 3.8836500000000001e+01
+-1.7050000000000001e-01 3.8829500000000003e+01
+-1.1550000000000001e-01 9.4884500000000003e+01
+ITEM: ATOMS id mol type x y z q element
+1 1 9 6.10782 11.4384 0.798271 -0.27 C
+2 1 6 5.73938 10.7721 1.60738 0.09 X
+3 1 6 5.36179 11.4598 -0.0247166 0.09 H
+4 1 6 6.22277 12.4656 1.20551 0.09 H
+5 1 7 7.41283 10.9555 0.293156 0.51 C
diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py
@@ -262,6 +262,8 @@
     "LAMMPSdata_additional_columns",  # structure for the additional column lammpstrj
     "LAMMPSDUMP",
     "LAMMPSDUMP_long",  # lammpsdump file with a few zeros sprinkled in the first column first frame
+    "LAMMPSDUMP_allinfo",  # lammpsdump file with resids, masses, charges and element symbols
+    "LAMMPSDUMP_nomass_elemx",  # lammps dump file with no masses, but with element symbols and one symbol 'X'
     "LAMMPSDUMP_allcoords",  # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example
     "LAMMPSDUMP_nocoords",  # lammpsdump file with no coordinates
     "LAMMPSDUMP_triclinic",  # lammpsdump file to test triclinic dimension parsing, albite with most atoms deleted
@@ -753,6 +755,10 @@
 LAMMPSdata_PairIJ = (_data_ref / "lammps/pairij_coeffs.data.bz2").as_posix()
 LAMMPSDUMP = (_data_ref / "lammps/wat.lammpstrj.bz2").as_posix()
 LAMMPSDUMP_long = (_data_ref / "lammps/wat.lammpstrj_long.bz2").as_posix()
+LAMMPSDUMP_allinfo = (_data_ref / "lammps/mass_q_elem.lammpstrj").as_posix()
+LAMMPSDUMP_nomass_elemx = (
+    _data_ref / "lammps/nomass_elemx.lammpstrj"
+).as_posix()
 LAMMPSDUMP_allcoords = (
     _data_ref / "lammps/spce_all_coords.lammpstrj.bz2"
 ).as_posix()