Skip to content

Commit 10a6691

Browse files
authored
Merge pull request #813 from HEXRD/memoize-framecache-load
Memoize frame cache load
2 parents c7a8c44 + 5301d5a commit 10a6691

File tree

1 file changed

+96
-38
lines changed

1 file changed

+96
-38
lines changed

hexrd/imageseries/load/framecache.py

Lines changed: 96 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Adapter class for frame caches
22
"""
3+
import functools
34
import os
45
from threading import Lock
56

@@ -127,54 +128,35 @@ def _load_framelist(self):
127128
self._load_framelist_npz()
128129

129130
def _load_framelist_fch5(self):
130-
self._framelist = [None] * self._nframes
131-
with h5py.File(self._fname, "r") as file:
132-
frame_id = file["frame_ids"]
133-
data = file["data"]
134-
indices = file["indices"]
135-
136-
def read_list_arrays_method_thread(i):
137-
frame_data = data[frame_id[2*i]: frame_id[2*i+1]]
138-
frame_indices = indices[frame_id[2*i]: frame_id[2*i+1]]
139-
row = frame_indices[:, 0]
140-
col = frame_indices[:, 1]
141-
mat_data = frame_data[:, 0]
142-
frame = csr_matrix((mat_data, (row, col)),
143-
shape=self._shape,
144-
dtype=self._dtype)
145-
self._framelist[i] = frame
146-
return
147-
148-
kwargs = {
149-
"max_workers": self._max_workers,
150-
}
151-
with ThreadPoolExecutor(**kwargs) as executor:
152-
# Evaluate the results via `list()`, so that if an exception is
153-
# raised in a thread, it will be re-raised and visible to the
154-
# user.
155-
list(executor.map(read_list_arrays_method_thread,
156-
range(self._nframes)))
131+
# Perform a memoized load, so that if multiple imageseries are
132+
# utilizing the same file, they can all share the same csr matrices
133+
self._framelist = _load_framecache_fch5(
134+
filepath=str(self._fname),
135+
num_frames=int(self._nframes),
136+
shape=tuple(self._shape),
137+
dtype=self._dtype,
138+
max_workers=int(self._max_workers),
139+
)
157140

158141
def _load_framelist_npz(self):
159-
self._framelist = []
160142
if self._from_yml:
161143
bpath = os.path.dirname(self._fname)
162144
if os.path.isabs(self._cache):
163145
cachepath = self._cache
164146
else:
165147
cachepath = os.path.join(bpath, self._cache)
166-
arrs = np.load(cachepath)
148+
filepath = cachepath
167149
else:
168-
arrs = np.load(self._fname)
150+
filepath = self._fname
169151

170-
for i in range(self._nframes):
171-
row = arrs[f"{i}_row"]
172-
col = arrs[f"{i}_col"]
173-
data = arrs[f"{i}_data"]
174-
frame = csr_matrix((data, (row, col)),
175-
shape=self._shape,
176-
dtype=self._dtype)
177-
self._framelist.append(frame)
152+
# Perform a memoized load, so that if multiple imageseries are
153+
# utilizing the same file, they can all share the same csr matrices
154+
self._framelist = _load_framecache_npz(
155+
filepath=str(filepath),
156+
num_frames=int(self._nframes),
157+
shape=tuple(self._shape),
158+
dtype=self._dtype,
159+
)
178160

179161
def get_region(self, frame_idx: int, region: RegionType) -> np.ndarray:
180162
self._load_framelist_if_needed()
@@ -255,3 +237,79 @@ def __setstate__(self, state):
255237
self.__dict__.update(state)
256238
# initialize lock after un-pickling
257239
self._load_framelist_lock = Lock()
240+
241+
242+
# This is memoized so that if multiple imageseries are sharing the
243+
# same file (such as 32 subpanel Eiger), all of the imageseries can
244+
# share the same sparse matrices.
245+
@functools.lru_cache(maxsize=2)
246+
def _load_framecache_npz(
247+
filepath: str,
248+
num_frames: int,
249+
shape: tuple[int, int],
250+
dtype: np.dtype,
251+
) -> list[csr_matrix]:
252+
253+
framelist = []
254+
arrs = np.load(filepath)
255+
for i in range(num_frames):
256+
row = arrs[f"{i}_row"]
257+
col = arrs[f"{i}_col"]
258+
data = arrs[f"{i}_data"]
259+
frame = csr_matrix((data, (row, col)),
260+
shape=shape,
261+
dtype=dtype)
262+
263+
# Make the data unwriteable, so we can be sure it won't be modified
264+
frame.data.flags.writeable = False
265+
266+
framelist.append(frame)
267+
268+
return framelist
269+
270+
271+
# This is memoized so that if multiple imageseries are sharing the
272+
# same file (such as 32 subpanel Eiger), all of the imageseries can
273+
# share the same sparse matrices.
274+
@functools.lru_cache(maxsize=2)
275+
def _load_framecache_fch5(
276+
filepath: str,
277+
num_frames: int,
278+
shape: tuple[int, int],
279+
dtype: np.dtype,
280+
max_workers: int,
281+
) -> list[csr_matrix]:
282+
283+
framelist = [None] * num_frames
284+
285+
with h5py.File(filepath, "r") as file:
286+
frame_id = file["frame_ids"]
287+
data = file["data"]
288+
indices = file["indices"]
289+
290+
def read_list_arrays_method_thread(i):
291+
frame_data = data[frame_id[2*i]: frame_id[2*i+1]]
292+
frame_indices = indices[frame_id[2*i]: frame_id[2*i+1]]
293+
row = frame_indices[:, 0]
294+
col = frame_indices[:, 1]
295+
mat_data = frame_data[:, 0]
296+
frame = csr_matrix((mat_data, (row, col)),
297+
shape=shape,
298+
dtype=dtype)
299+
300+
# Make the data unwriteable, so we can be sure it won't be modified
301+
frame.data.flags.writeable = False
302+
303+
framelist[i] = frame
304+
305+
kwargs = {
306+
"max_workers": max_workers,
307+
}
308+
with ThreadPoolExecutor(**kwargs) as executor:
309+
# Evaluate the results via `list()`, so that if an exception is
310+
# raised in a thread, it will be re-raised and visible to the
311+
# user.
312+
list(executor.map(read_list_arrays_method_thread,
313+
range(num_frames)))
314+
315+
return framelist

0 commit comments

Comments
 (0)