Merge pull request #833 from HEXRD/polar-view-speedups

psavery · web-flow · commit cf4585994fc2 · 2025-09-03T13:39:34.000-05:00
Implement several speedups for polar view generation
diff --git a/hexrd/instrument/detector.py b/hexrd/instrument/detector.py
@@ -7,6 +7,7 @@
     COATING_DEFAULT, FILTER_DEFAULTS, PHOSPHOR_DEFAULT
 )
 from hexrd.instrument.physics_package import AbstractPhysicsPackage
+import numba
 import numpy as np
 
 from hexrd import constants as ct
@@ -1111,9 +1112,12 @@ def interpolate_nearest(self, xy, img, pad_with_nans=True):
         int_xy[on_panel] = int_vals
         return int_xy
 
-    def interpolate_bilinear(self, xy, img, pad_with_nans=True,
-                             clip_to_panel=True,
-                             on_panel: Optional[np.ndarray] = None):
+    def interpolate_bilinear(
+        self,
+        xy: np.ndarray,
+        img: np.ndarray,
+        pad_with_nans: bool = True,
+    ):
         """
         Interpolate an image array at the specified cartesian points.
 
@@ -1123,13 +1127,10 @@ def interpolate_bilinear(self, xy, img, pad_with_nans=True,
             Array of cartesian coordinates in the image plane at which
             to evaluate intensity.
         img : array_like
-            2-dimensional image array.
+            2-dimensional image array. The shape must match (rows, cols).
         pad_with_nans : bool, optional
             Toggle for assigning NaN to points that fall off the detector.
             The default is True.
-        on_panel : np.ndarray, optional
-            If you want to skip clip_to_panel() for performance reasons,
-            just provide an array of which pixels are on the panel.
 
         Returns
         -------
@@ -1141,28 +1142,30 @@ def interpolate_bilinear(self, xy, img, pad_with_nans=True,
         -----
         TODO: revisit normalization in here?
         """
+        fill_value = np.nan if pad_with_nans else 0
+        int_xy = np.full(len(xy), fill_value, dtype=float)
 
-        is_2d = img.ndim == 2
-        right_shape = img.shape[0] == self.rows and img.shape[1] == self.cols
-        assert (
-            is_2d and right_shape
-        ), "input image must be 2-d with shape (%d, %d)" % (
-            self.rows,
-            self.cols,
-        )
+        # clip away points too close to or off the detector edges
+        xy_clip, on_panel = self.clip_to_panel(xy, buffer_edges=True)
 
-        # initialize output with nans
-        if pad_with_nans:
-            int_xy = np.nan * np.ones(len(xy))
-        else:
-            int_xy = np.zeros(len(xy))
+        # Generate the interpolation dict
+        interp_dict = self._generate_bilinear_interp_dict(xy_clip)
 
-        if on_panel is None:
-            # clip away points too close to or off the edges of the detector
-            xy_clip, on_panel = self.clip_to_panel(xy, buffer_edges=True)
-        else:
-            xy_clip = xy[on_panel]
+        # Set the output and return
+        int_xy[on_panel] = _interpolate_bilinear(img, **interp_dict)
+        return int_xy
 
+    def _generate_bilinear_interp_dict(
+        self,
+        xy_clip: np.ndarray,
+    ) -> dict[str, np.ndarray]:
+        """Compute bilinear interpolation multipliers and indices for the panel
+
+        If you are going to be using the same panel settings and performing
+        interpolation on multiple images, it is advised to run this beforehand
+        to precompute the interpolation parameters, so you can use them
+        repeatedly.
+        """
         # grab fractional pixel indices of clipped points
         ij_frac = self.cartToPixel(xy_clip)
 
@@ -1182,20 +1185,24 @@ def interpolate_bilinear(self, xy, img, pad_with_nans=True,
         j_ceil = j_floor + 1
         j_ceil_img = _fix_indices(j_ceil, 0, self.cols - 1)
 
-        # first interpolate at top/bottom rows
-        row_floor_int = (j_ceil - ij_frac[:, 1]) * img[
-            i_floor_img, j_floor_img
-        ] + (ij_frac[:, 1] - j_floor) * img[i_floor_img, j_ceil_img]
-        row_ceil_int = (j_ceil - ij_frac[:, 1]) * img[
-            i_ceil_img, j_floor_img
-        ] + (ij_frac[:, 1] - j_floor) * img[i_ceil_img, j_ceil_img]
-
-        # next interpolate across cols
-        int_vals = (i_ceil - ij_frac[:, 0]) * row_floor_int + (
-            ij_frac[:, 0] - i_floor
-        ) * row_ceil_int
-        int_xy[on_panel] = int_vals
-        return int_xy
+        # Compute differences between raw coordinates to use for interpolation
+        j_ceil_sub = j_ceil - ij_frac[:, 1]
+        j_floor_sub = ij_frac[:, 1] - j_floor
+        i_ceil_sub = i_ceil - ij_frac[:, 0]
+        i_floor_sub = ij_frac[:, 0] - i_floor
+
+        return {
+            # Compute interpolation multipliers for every pixel
+            'cc': j_ceil_sub * i_ceil_sub,
+            'fc': j_floor_sub * i_ceil_sub,
+            'cf': j_ceil_sub * i_floor_sub,
+            'ff': j_floor_sub * i_floor_sub,
+            # Store needed pixel indices
+            'i_floor_img': i_floor_img,
+            'j_floor_img': j_floor_img,
+            'i_ceil_img': i_ceil_img,
+            'j_ceil_img': j_ceil_img,
+        }
 
     def make_powder_rings(
         self,
@@ -2100,3 +2107,63 @@ def _row_edge_vec(rows, pixel_size_row):
 
 def _col_edge_vec(cols, pixel_size_col):
     return pixel_size_col * (np.arange(cols + 1) - 0.5 * cols)
+
+
+@numba.njit(nogil=True, cache=True)
+def _interpolate_bilinear(
+    img: np.ndarray,
+    cc: np.ndarray,
+    fc: np.ndarray,
+    cf: np.ndarray,
+    ff: np.ndarray,
+    i_floor_img: np.ndarray,
+    j_floor_img: np.ndarray,
+    i_ceil_img: np.ndarray,
+    j_ceil_img: np.ndarray,
+) -> np.ndarray:
+    # The math is faster and uses the GIL less (which is more
+    # multi-threading friendly) when we run this code in numba.
+    result = np.zeros(i_floor_img.shape[0], dtype=img.dtype)
+    on_panel_idx = np.arange(i_floor_img.shape[0])
+    _interpolate_bilinear_in_place(
+        img,
+        cc,
+        fc,
+        cf,
+        ff,
+        i_floor_img,
+        j_floor_img,
+        i_ceil_img,
+        j_ceil_img,
+        on_panel_idx,
+        result,
+    )
+    return result
+
+
+@numba.njit(nogil=True, cache=True)
+def _interpolate_bilinear_in_place(
+    img: np.ndarray,
+    cc: np.ndarray,
+    fc: np.ndarray,
+    cf: np.ndarray,
+    ff: np.ndarray,
+    i_floor_img: np.ndarray,
+    j_floor_img: np.ndarray,
+    i_ceil_img: np.ndarray,
+    j_ceil_img: np.ndarray,
+    on_panel_idx: np.ndarray,
+    output_img: np.ndarray,
+):
+    # The math is faster and uses the GIL less (which is more
+    # multi-threading friendly) when we run this code in numba.
+    # Running in-place eliminates some intermediary arrays for
+    # even faster performance.
+    for i in range(on_panel_idx.shape[0]):
+        idx = on_panel_idx[i]
+        output_img[idx] += (
+            cc[i] * img[i_floor_img[i], j_floor_img[i]] +
+            fc[i] * img[i_floor_img[i], j_ceil_img[i]] +
+            cf[i] * img[i_ceil_img[i], j_floor_img[i]] +
+            ff[i] * img[i_ceil_img[i], j_ceil_img[i]]
+        )
diff --git a/hexrd/projections/polar.py b/hexrd/projections/polar.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from hexrd import constants
+from hexrd.instrument.detector import _interpolate_bilinear_in_place
 from hexrd.material.crystallography import PlaneData
 from hexrd.xrdutil.utils import (
     _project_on_detector_cylinder,
@@ -77,13 +78,15 @@ def __init__(self, plane_data, instrument,
         self._instrument = instrument
 
         self._coordinate_mapping = None
+        self._nan_mask = None
         self._cache_coordinate_map = cache_coordinate_map
         if cache_coordinate_map:
             # It is important to generate the cached map now, rather than
             # later, because this object might be sent to other processes
             # for parallelization, and it will be faster if the mapping
             # is already generated.
             self._coordinate_mapping = self._generate_coordinate_mapping()
+            self._nan_mask = self._generate_nan_mask(self._coordinate_mapping)
 
     @property
     def instrument(self):
@@ -261,13 +264,21 @@ def warp_image(self, image_dict, pad_with_nans=False,
         if self.cache_coordinate_map:
             # The mapping should have already been generated.
             mapping = self._coordinate_mapping
+            nan_mask = self._nan_mask
         else:
             # Otherwise, we must generate it every time
             mapping = self._generate_coordinate_mapping()
+            # FIXME: this performs a bilinear interpolation
+            # each time. Maybe it doesn't matter that much
+            # since the interpolation is very fast now, but
+            # it'd be nice if we could figure out another
+            # way to do it.
+            nan_mask = self._generate_nan_mask(mapping)
 
         return self._warp_image_from_coordinate_map(
             image_dict,
             mapping,
+            nan_mask,
             pad_with_nans=pad_with_nans,
             do_interpolation=do_interpolation,
         )
@@ -312,66 +323,104 @@ def _generate_coordinate_mapping(self) -> dict[str, dict[str, np.ndarray]]:
             xypts[on_plane, :] = valid_xys
 
             _, on_panel = panel.clip_to_panel(xypts, buffer_edges=True)
+            on_panel_idx = np.where(on_panel)[0]
+            xy_clip = xypts[on_panel_idx]
+
+            bilinear_interp_dict = panel._generate_bilinear_interp_dict(
+                xy_clip,
+            )
 
             mapping[detector_id] = {
                 'xypts': xypts,
-                'on_panel': on_panel,
+                'on_panel_idx': on_panel_idx,
+                'bilinear_interp_dict': bilinear_interp_dict,
             }
 
         return mapping
 
-    def _warp_image_from_coordinate_map(
-            self,
-            image_dict: dict[str, np.ndarray],
-            coordinate_map: dict[str, dict[str, np.ndarray]],
-            pad_with_nans: bool = False,
-            do_interpolation=True) -> np.ma.MaskedArray:
-
-        panel_buffer_fill_value = np.nan
-        img_dict = dict.fromkeys(self.detectors)
-        nan_mask = None
-        for detector_id, panel in self.detectors.items():
-            # Make a copy since we may modify
-            img = image_dict[detector_id].copy()
+    def _generate_nan_mask(
+        self,
+        coordinate_map: dict[str, dict[str, np.ndarray]],
+    ) -> np.ndarray:
+        """Generate the nan mask
 
-            # Before warping, mask out any pixels that are invalid,
-            # so that they won't affect the results.
+        This saves time during repeated calls to warp_image(),
+        since the nan mask should stay the same and not change.
+        """
+        mapping = coordinate_map
+        # Generate the nan mask that we will use
+        nan_mask = np.ones(self.shape, dtype=bool).flatten()
+        for detector_id, panel in self.detectors.items():
+            on_panel_idx = mapping[detector_id]['on_panel_idx']
+            xypts = mapping[detector_id]['xypts']
+            interp_dict = mapping[detector_id]['bilinear_interp_dict']
+
+            # To reproduce old behavior, perform a bilinear
+            # interpolation so that if any point has neighboring
+            # pixels that are nan, that point will also be excluded.
+            dummy_img = np.zeros(panel.shape)
             buffer = panel_buffer_as_2d_array(panel)
-            if (np.issubdtype(type(panel_buffer_fill_value), np.floating) and
-                    not np.issubdtype(img.dtype, np.floating)):
-                # Convert to float. This is especially important
-                # for nan, since it is a float...
-                img = img.astype(float)
+            dummy_img[~buffer] = np.nan
+
+            output = np.full(len(xypts), np.nan)
+            output[on_panel_idx] = 0
+            _interpolate_bilinear_in_place(
+                dummy_img,
+                **interp_dict,
+                on_panel_idx=on_panel_idx,
+                output_img=output,
+            )
 
-            img[~buffer] = panel_buffer_fill_value
+            nan_mask[~np.isnan(output)] = False
 
-            xypts = coordinate_map[detector_id]['xypts']
-            on_panel = coordinate_map[detector_id]['on_panel']
+        return nan_mask.reshape(self.shape)
+
+    def _warp_image_from_coordinate_map(
+        self,
+        image_dict: dict[str, np.ndarray],
+        coordinate_map: dict[str, dict[str, np.ndarray]],
+        nan_mask: np.ndarray,
+        pad_with_nans: bool = False,
+        do_interpolation=True,
+    ) -> np.ma.MaskedArray:
+        first_det = next(iter(self.detectors))
+        # This is a flat image. We'll reshape at the end.
+        summed_img = np.zeros(len(coordinate_map[first_det]['xypts']))
+        for detector_id, panel in self.detectors.items():
+            img = image_dict[detector_id]
+            panel_map = coordinate_map[detector_id]
+
+            xypts = panel_map['xypts']
+            on_panel_idx = panel_map['on_panel_idx']
+            interp_dict = panel_map['bilinear_interp_dict']
 
             if do_interpolation:
-                this_img = panel.interpolate_bilinear(
-                    xypts, img,
-                    pad_with_nans=pad_with_nans,
-                    on_panel=on_panel).reshape(self.shape)
-            else:
-                this_img = panel.interpolate_nearest(
-                    xypts, img,
-                    pad_with_nans=pad_with_nans).reshape(self.shape)
-
-            # It is faster to keep track of the global nans like this
-            # rather than the previous way we were doing it...
-            img_nans = np.isnan(this_img)
-            if nan_mask is None:
-                nan_mask = img_nans
+                # It's faster if we do _interpolate_bilinear ourselves,
+                # since we already have all appropriate options set up.
+                _interpolate_bilinear_in_place(
+                    img,
+                    **interp_dict,
+                    on_panel_idx=on_panel_idx,
+                    output_img=summed_img,
+                )
             else:
-                nan_mask = np.logical_and(img_nans, nan_mask)
+                summed_img += panel.interpolate_nearest(
+                    xypts,
+                    img,
+                    # DON'T pad with nans, so we can sum images together
+                    # correctly. We'll pad with nans later.
+                    pad_with_nans=False,
+                )
+
+        # Now reshape the image to the appropriate shape
+        output_img = summed_img.reshape(self.shape)
 
-            this_img[img_nans] = 0
-            img_dict[detector_id] = this_img
+        if pad_with_nans:
+            # We pad with nans manually here
+            output_img[nan_mask] = np.nan
 
-        summed_img = np.sum(list(img_dict.values()), axis=0)
         return np.ma.masked_array(
-            data=summed_img, mask=nan_mask, fill_value=0.
+            data=output_img, mask=nan_mask, fill_value=0.
         )
 
     def tth_to_pixel(self, tth):