|
1 | 1 | """Adapter class for frame caches |
2 | 2 | """ |
| 3 | +import functools |
3 | 4 | import os |
4 | 5 | from threading import Lock |
5 | 6 |
|
@@ -127,54 +128,35 @@ def _load_framelist(self): |
127 | 128 | self._load_framelist_npz() |
128 | 129 |
|
129 | 130 | def _load_framelist_fch5(self): |
130 | | - self._framelist = [None] * self._nframes |
131 | | - with h5py.File(self._fname, "r") as file: |
132 | | - frame_id = file["frame_ids"] |
133 | | - data = file["data"] |
134 | | - indices = file["indices"] |
135 | | - |
136 | | - def read_list_arrays_method_thread(i): |
137 | | - frame_data = data[frame_id[2*i]: frame_id[2*i+1]] |
138 | | - frame_indices = indices[frame_id[2*i]: frame_id[2*i+1]] |
139 | | - row = frame_indices[:, 0] |
140 | | - col = frame_indices[:, 1] |
141 | | - mat_data = frame_data[:, 0] |
142 | | - frame = csr_matrix((mat_data, (row, col)), |
143 | | - shape=self._shape, |
144 | | - dtype=self._dtype) |
145 | | - self._framelist[i] = frame |
146 | | - return |
147 | | - |
148 | | - kwargs = { |
149 | | - "max_workers": self._max_workers, |
150 | | - } |
151 | | - with ThreadPoolExecutor(**kwargs) as executor: |
152 | | - # Evaluate the results via `list()`, so that if an exception is |
153 | | - # raised in a thread, it will be re-raised and visible to the |
154 | | - # user. |
155 | | - list(executor.map(read_list_arrays_method_thread, |
156 | | - range(self._nframes))) |
| 131 | + # Perform a memoized load, so that if multiple imageseries are |
| 132 | + # utilizing the same file, they can all share the same csr matrices |
| 133 | + self._framelist = _load_framecache_fch5( |
| 134 | + filepath=str(self._fname), |
| 135 | + num_frames=int(self._nframes), |
| 136 | + shape=tuple(self._shape), |
| 137 | + dtype=self._dtype, |
| 138 | + max_workers=int(self._max_workers), |
| 139 | + ) |
157 | 140 |
|
158 | 141 | def _load_framelist_npz(self): |
159 | | - self._framelist = [] |
160 | 142 | if self._from_yml: |
161 | 143 | bpath = os.path.dirname(self._fname) |
162 | 144 | if os.path.isabs(self._cache): |
163 | 145 | cachepath = self._cache |
164 | 146 | else: |
165 | 147 | cachepath = os.path.join(bpath, self._cache) |
166 | | - arrs = np.load(cachepath) |
| 148 | + filepath = cachepath |
167 | 149 | else: |
168 | | - arrs = np.load(self._fname) |
| 150 | + filepath = self._fname |
169 | 151 |
|
170 | | - for i in range(self._nframes): |
171 | | - row = arrs[f"{i}_row"] |
172 | | - col = arrs[f"{i}_col"] |
173 | | - data = arrs[f"{i}_data"] |
174 | | - frame = csr_matrix((data, (row, col)), |
175 | | - shape=self._shape, |
176 | | - dtype=self._dtype) |
177 | | - self._framelist.append(frame) |
| 152 | + # Perform a memoized load, so that if multiple imageseries are |
| 153 | + # utilizing the same file, they can all share the same csr matrices |
| 154 | + self._framelist = _load_framecache_npz( |
| 155 | + filepath=str(filepath), |
| 156 | + num_frames=int(self._nframes), |
| 157 | + shape=tuple(self._shape), |
| 158 | + dtype=self._dtype, |
| 159 | + ) |
178 | 160 |
|
179 | 161 | def get_region(self, frame_idx: int, region: RegionType) -> np.ndarray: |
180 | 162 | self._load_framelist_if_needed() |
@@ -255,3 +237,79 @@ def __setstate__(self, state): |
255 | 237 | self.__dict__.update(state) |
256 | 238 | # initialize lock after un-pickling |
257 | 239 | self._load_framelist_lock = Lock() |
| 240 | + |
| 241 | + |
| 242 | +# This is memoized so that if multiple imageseries are sharing the |
| 243 | +# same file (such as 32 subpanel Eiger), all of the imageseries can |
| 244 | +# share the same sparse matrices. |
| 245 | +@functools.lru_cache(maxsize=2) |
| 246 | +def _load_framecache_npz( |
| 247 | + filepath: str, |
| 248 | + num_frames: int, |
| 249 | + shape: tuple[int, int], |
| 250 | + dtype: np.dtype, |
| 251 | +) -> list[csr_matrix]: |
| 252 | + |
| 253 | + framelist = [] |
| 254 | + arrs = np.load(filepath) |
| 255 | + for i in range(num_frames): |
| 256 | + row = arrs[f"{i}_row"] |
| 257 | + col = arrs[f"{i}_col"] |
| 258 | + data = arrs[f"{i}_data"] |
| 259 | + frame = csr_matrix((data, (row, col)), |
| 260 | + shape=shape, |
| 261 | + dtype=dtype) |
| 262 | + |
| 263 | + # Make the data unwriteable, so we can be sure it won't be modified |
| 264 | + frame.data.flags.writeable = False |
| 265 | + |
| 266 | + framelist.append(frame) |
| 267 | + |
| 268 | + return framelist |
| 269 | + |
| 270 | + |
| 271 | +# This is memoized so that if multiple imageseries are sharing the |
| 272 | +# same file (such as 32 subpanel Eiger), all of the imageseries can |
| 273 | +# share the same sparse matrices. |
| 274 | +@functools.lru_cache(maxsize=2) |
| 275 | +def _load_framecache_fch5( |
| 276 | + filepath: str, |
| 277 | + num_frames: int, |
| 278 | + shape: tuple[int, int], |
| 279 | + dtype: np.dtype, |
| 280 | + max_workers: int, |
| 281 | +) -> list[csr_matrix]: |
| 282 | + |
| 283 | + framelist = [None] * num_frames |
| 284 | + |
| 285 | + with h5py.File(filepath, "r") as file: |
| 286 | + frame_id = file["frame_ids"] |
| 287 | + data = file["data"] |
| 288 | + indices = file["indices"] |
| 289 | + |
| 290 | + def read_list_arrays_method_thread(i): |
| 291 | + frame_data = data[frame_id[2*i]: frame_id[2*i+1]] |
| 292 | + frame_indices = indices[frame_id[2*i]: frame_id[2*i+1]] |
| 293 | + row = frame_indices[:, 0] |
| 294 | + col = frame_indices[:, 1] |
| 295 | + mat_data = frame_data[:, 0] |
| 296 | + frame = csr_matrix((mat_data, (row, col)), |
| 297 | + shape=shape, |
| 298 | + dtype=dtype) |
| 299 | + |
| 300 | + # Make the data unwriteable, so we can be sure it won't be modified |
| 301 | + frame.data.flags.writeable = False |
| 302 | + |
| 303 | + framelist[i] = frame |
| 304 | + |
| 305 | + kwargs = { |
| 306 | + "max_workers": max_workers, |
| 307 | + } |
| 308 | + with ThreadPoolExecutor(**kwargs) as executor: |
| 309 | + # Evaluate the results via `list()`, so that if an exception is |
| 310 | + # raised in a thread, it will be re-raised and visible to the |
| 311 | + # user. |
| 312 | + list(executor.map(read_list_arrays_method_thread, |
| 313 | + range(num_frames))) |
| 314 | + |
| 315 | + return framelist |
0 commit comments