FIX: in chunked_processing, replicate replicable tensors on-the-fly instead of replicating them in full length before the function to reduce memory usage

mdw771 · mdw771 · commit eedb32bc403b · 2025-10-06T13:16:34.000-05:00
diff --git a/src/ptychi/data_structures/object.py b/src/ptychi/data_structures/object.py
@@ -561,8 +561,6 @@ def calculate_illumination_map(
             probe_int = probe.get_all_mode_intensity(opr_mode=0)[None, :, :]
         else:
             probe_int = probe.get_mode_and_opr_mode(mode=0, opr_mode=0)[None, ...].abs() ** 2
-        # Shape of probe_int:    (n_scan_points, h, w)
-        probe_int = probe_int.repeat(len(positions_all), 1, 1)
 
         # Stitch probes of all positions on the object buffer
         # TODO: allow setting chunk size externally
@@ -571,11 +569,13 @@ def calculate_illumination_map(
             common_kwargs={"op": "add"},
             chunkable_kwargs={
                 "positions": positions_all.round().int() + self.pos_origin_coords,
-                "patches": probe_int,
             },
             iterated_kwargs={
                 "image": torch.zeros_like(object_.real).type(torch.get_default_dtype())
             },
+            replicated_kwargs={
+                "patches": probe_int,
+            },
             chunk_size=64,
         )
         return probe_sq_map
diff --git a/src/ptychi/utils.py b/src/ptychi/utils.py
@@ -365,6 +365,7 @@ def chunked_processing(
     common_kwargs: dict,
     chunkable_kwargs: dict,
     iterated_kwargs: dict,
+    replicated_kwargs: dict = None,
     chunk_size: int = 96,
 ):
     """
@@ -380,6 +381,10 @@ def chunked_processing(
         A dictionary of arguments that should be returned by `func`, then passed to `func`
         for the next chunk. The order of arguments should be the same as the returns of
         `func`.
+    replicated_kwargs : dict, optional
+        A dictionary of arguments that should be replicated for each chunk along the
+        first dimension to match the chunk size. Tensors given here should have a first
+        dimension of size 1 intended as the batch dimension.
     chunk_size : int, optional
         The size of each chunk. Default is 96.
 
@@ -404,6 +409,13 @@ def chunked_processing(
         ind_st = ind_end
 
     for kwargs_chunk in chunks_of_chunkable_args:
+        current_chunk_size = kwargs_chunk[list(kwargs_chunk.keys())[0]].shape[0]
+        if replicated_kwargs is not None:
+            replicated_kwargs_chunk = {
+                key: torch.repeat_interleave(value, current_chunk_size, dim=0)
+                for key, value in replicated_kwargs.items()
+            }
+            kwargs_chunk.update(replicated_kwargs_chunk)
         ret = func(**common_kwargs, **kwargs_chunk, **iterated_kwargs)
         if isinstance(ret, tuple):
             for i, key in enumerate(iterated_kwargs.keys()):