scipy
diff --git a/‎.github/workflows/gpu_tests.yml
Lines changed: 48 additions & 0 deletions b/‎.github/workflows/gpu_tests.yml
Lines changed: 48 additions & 0 deletions
diff --git a/‎include/xsf/config.h
Lines changed: 8 additions & 3 deletions b/‎include/xsf/config.h
Lines changed: 8 additions & 3 deletions
@@ -0,0 +1,48 @@
+name: GPU jobs
+
+on: [ push, pull_request ]
+
+permissions:
+   contents: read  # to fetch code (actions/checkout)
+
+env:
+  CCACHE_DIR: "${{ github.workspace }}/.ccache"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  cupy_tests:
+    name: CuPy GPU
+    runs-on: ghcr.io/cirruslabs/ubuntu-runner-amd64-gpu:22.04
+    steps:
+      - name: Checkout xsf repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          submodules: recursive
+
+      - name: Setup compiler cache
+        uses: cirruslabs/cache@v4  #caa3ad0624c6c2acd8ba50ad452d1f44bba078bb # v4
+        with:
+          path: ${{ env.CCACHE_DIR }}
+          # Make primary key unique by using `run_id`, this ensures the cache
+          # is always saved at the end.
+          key: ${{ runner.os }}-gpu-ccache-${{ github.run_id }}
+          restore-keys: |
+            ${{ runner.os }}-gpu-ccache
+
+      - name: run nvidia-smi
+        run: nvidia-smi
+
+      - name: run nvidia-smi --query
+        run: nvidia-smi --query
+
+      - uses: prefix-dev/setup-pixi@ba3bb36eb2066252b2363392b7739741bb777659 # v0.8.1
+        with:
+          pixi-version: v0.39.2
+          manifest-path: pixi.toml
+          cache: false
+
+      - name: Run CuPy tests
+        run: pixi run test-cupy
@@ -108,7 +108,7 @@ XSF_HOST_DEVICE inline bool signbit(double x) { return cuda::std::signbit(x); }
 XSF_HOST_DEVICE inline double hypot(double x, double y) { return cuda::std::hypot(x, y); }
 
 // Fallback to global namespace for functions unsupported on NVRTC
-#ifndef _LIBCUDACXX_COMPILER_NVRTC
+#ifndef __CUDACC_RTC__
 XSF_HOST_DEVICE inline double ceil(double x) { return cuda::std::ceil(x); }
 XSF_HOST_DEVICE inline double floor(double x) { return cuda::std::floor(x); }
 XSF_HOST_DEVICE inline double round(double x) { return cuda::std::round(x); }
@@ -210,8 +210,13 @@ using enable_if = cuda::std::enable_if<Cond, T>;
 template <typename T>
 using decay = cuda::std::decay<T>;
 
-template <typename T>
-using invoke_result = cuda::std::invoke_result<T>;
+template <typename F>
+struct invoke_result {
+    using type = decltype(cuda::std::declval<F>()());
+};
+
+template <typename F>
+using invoke_result_t = typename invoke_result<F>::type;
 
 template <typename T1, typename T2>
 using pair = cuda::std::pair<T1, T2>;