From 87a53f359597a87d7204155ab03479f915d739cc Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Fri, 27 Jun 2025 23:29:04 -0400 Subject: [PATCH 1/2] Use csr_array instead of csr_matrix Scipy is moving away from using csr_matrix and using csr_array for everything, similarly to how numpy moved away from `np.matrix` and started using `np.ndarray` for everything. We ought to use `csr_array` instead. Signed-off-by: Patrick Avery --- hexrd/imageseries/load/framecache.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hexrd/imageseries/load/framecache.py b/hexrd/imageseries/load/framecache.py index 95003ee1..c1431d6f 100644 --- a/hexrd/imageseries/load/framecache.py +++ b/hexrd/imageseries/load/framecache.py @@ -7,7 +7,7 @@ import h5py import numpy as np import yaml -from scipy.sparse import csr_matrix +from scipy.sparse import csr_array # FIXME: figure out if there is a public way to import this function from scipy.sparse._compressed import csr_sample_values @@ -264,7 +264,7 @@ def _load_framecache_npz( num_frames: int, shape: tuple[int, int], dtype: np.dtype, -) -> list[csr_matrix]: +) -> list[csr_array]: framelist = [] arrs = np.load(filepath) @@ -272,7 +272,7 @@ def _load_framecache_npz( row = arrs[f"{i}_row"] col = arrs[f"{i}_col"] data = arrs[f"{i}_data"] - frame = csr_matrix((data, (row, col)), + frame = csr_array((data, (row, col)), shape=shape, dtype=dtype) @@ -294,7 +294,7 @@ def _load_framecache_fch5( shape: tuple[int, int], dtype: np.dtype, max_workers: int, -) -> list[csr_matrix]: +) -> list[csr_array]: framelist = [None] * num_frames @@ -309,7 +309,7 @@ def read_list_arrays_method_thread(i): row = frame_indices[:, 0] col = frame_indices[:, 1] mat_data = frame_data[:, 0] - frame = csr_matrix((mat_data, (row, col)), + frame = csr_array((mat_data, (row, col)), shape=shape, dtype=dtype) @@ -332,7 +332,7 @@ def read_list_arrays_method_thread(i): def _extract_sparse_values( - mat: csr_matrix, + mat: csr_array, row: np.ndarray, col: np.ndarray, ) -> np.ndarray: From e830e1ec59d96afa2335399a39d8f6a7c23e85fa Mon Sep 17 00:00:00 2001 From: Patrick Avery Date: Fri, 27 Jun 2025 23:33:03 -0400 Subject: [PATCH 2/2] Avoid using csr_sample_values Instead we should ravel and reshape the `csr_array`. See discussion [here](https://github.com/scipy/scipy/issues/23211). Signed-off-by: Patrick Avery --- hexrd/imageseries/load/framecache.py | 32 +++------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/hexrd/imageseries/load/framecache.py b/hexrd/imageseries/load/framecache.py index c1431d6f..7c63c6f6 100644 --- a/hexrd/imageseries/load/framecache.py +++ b/hexrd/imageseries/load/framecache.py @@ -9,9 +9,6 @@ import yaml from scipy.sparse import csr_array -# FIXME: figure out if there is a public way to import this function -from scipy.sparse._compressed import csr_sample_values - from . import ImageSeriesAdapter, RegionType from ..imageseriesiter import ImageSeriesIterator from .metadata import yamlmeta @@ -220,7 +217,9 @@ def __getitem__(self, key): if len(key) == 3: # This is definitely used frequently and needs to # be performant. - return _extract_sparse_values(mat, key[1], key[2]) + ind1 = key[1] + ind2 = key[2] + return mat[ind1.ravel(), ind2.ravel()].reshape(ind1.shape) elif len(key) == 2: # Not sure if this will actually be used. return mat[key[1]].toarray() @@ -329,28 +328,3 @@ def read_list_arrays_method_thread(i): range(num_frames))) return framelist - - -def _extract_sparse_values( - mat: csr_array, - row: np.ndarray, - col: np.ndarray, -) -> np.ndarray: - # This was first copied from here: https://github.com/scipy/scipy/blob/a465e2ce014c1b20b0e4b949e46361e5c2fb727e/scipy/sparse/_compressed.py#L556-L569 - # And then subsequently modified to return the internal `val` array. - - # It uses the `csr_sample_values()` function to extract values. This is - # excellent because it skips the creation of a new sparse array (and - # subsequent conversion to a numpy array *again*). It provides a nearly - # 10% performance boost for `pull_spots()`. - idx_dtype = mat.indices.dtype - M, N = mat._swap(mat.shape) - major, minor = mat._swap((row, col)) - major = np.asarray(major, dtype=idx_dtype) - minor = np.asarray(minor, dtype=idx_dtype) - - val = np.empty(major.size, dtype=mat.dtype) - csr_sample_values(M, N, mat.indptr, mat.indices, mat.data, - major.size, major.ravel(), minor.ravel(), val) - - return val.reshape(major.shape)