rapidsai · rapids-bot · Aug 27, 2025 · May 30, 2025 · Aug 4, 2025 · Aug 4, 2025
@@ -50,6 +50,7 @@ dependencies:
 - pydata-sphinx-theme!=0.14.2
 - pylibraft==25.10.*,>=0.0.0a0
 - pynndescent
+- pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
 - pytest-cases

@@ -50,6 +50,7 @@ dependencies:
 - pydata-sphinx-theme!=0.14.2
 - pylibraft==25.10.*,>=0.0.0a0
 - pynndescent
+- pynvml>=12.0.0,<13.0.0a0
 - pytest
 - pytest-benchmark
 - pytest-cases

@@ -440,6 +440,7 @@ dependencies:
           - nltk
           # upstream sklearn docstring tests require numpydoc<1.9
           - numpydoc<1.9
+          - pynvml>=12.0.0,<13.0.0a0
           - pyyaml
           - pytest
           - pytest-benchmark

@@ -135,6 +135,7 @@ test = [
     "nltk",
     "numpydoc<1.9",
     "pynndescent",
+    "pynvml>=12.0.0,<13.0.0a0",
     "pytest",
     "pytest-benchmark",
     "pytest-cases",

@@ -15,7 +15,6 @@
 #
 
 import os
-import subprocess
 from datetime import timedelta
 from math import ceil
 from ssl import create_default_context
@@ -27,6 +26,7 @@
 import hypothesis
 import numpy as np
 import pandas as pd
+import pynvml
 import pytest
 from sklearn import datasets
 from sklearn.datasets import fetch_20newsgroups, fetch_california_housing
@@ -275,20 +275,72 @@ def pytest_pyfunc_call(pyfuncitem):
         pytest.skip("Test requires cudf.pandas accelerator")
 
 
-def _get_gpu_memory():
-    """Get the total GPU memory in GB."""
-    bash_command = "nvidia-smi --query-gpu=memory.total --format=csv"
-    output = subprocess.check_output(bash_command, shell=True).decode("utf-8")
-    lines = output.split("\n")
-    lines.pop(0)
-    gpus_memory = []
-    for line in lines:
-        tokens = line.split(" ")
-        if len(tokens) > 1:
-            gpus_memory.append(int(tokens[0]))
-    gpus_memory.sort()
-    max_gpu_memory = ceil(gpus_memory[-1] / 1024)
-    return max_gpu_memory
+def _get_pynvml_device_handle(device_id=0):
+    """Get GPU handle from device index or UUID.
+
+    Parameters
+    ----------
+    device_id: int or str
+        The index or UUID of the device from which to obtain the handle.
+
+    Raises
+    ------
+    ValueError
+        If acquiring the device handle for the device specified failed.
+    pynvml.NVMLError
+        If any NVML error occurred while initializing.
+
+    Returns
+    -------
+    A pynvml handle to the device.
+
+    Examples
+    --------
+    >>> _get_pynvml_device_handle(device_id=0)
+
+    >>> _get_pynvml_device_handle(device_id="GPU-9fb42d6f-7d6b-368f-f79c-3c3e784c93f6")
+    """
+    pynvml.nvmlInit()
+
+    try:
+        if device_id and not str(device_id).isnumeric():
+            # This means device_id is UUID.
+            # This works for both MIG and non-MIG device UUIDs.
+            handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(device_id))
+            if pynvml.nvmlDeviceIsMigDeviceHandle(handle):
+                # Additionally get parent device handle
+                # if the device itself is a MIG instance
+                handle = pynvml.nvmlDeviceGetDeviceHandleFromMigDeviceHandle(
+                    handle
+                )
+        else:
+            handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
+        return handle
+    except pynvml.NVMLError:
+        raise ValueError(f"Invalid device index or UUID: {device_id}")
+
+
+def _get_gpu_memory(device_index=0):
+    """Return total memory of CUDA device with index or with device identifier UUID.
+
+    Parameters
+    ----------
+    device_index: int or str
+        The index or UUID of the device from which to obtain the CPU affinity.
+
+    Returns
+    -------
+    The total memory of the CUDA Device in GB, or ``None`` for devices that do not
+    have a dedicated memory resource, as is usually the case for system on a chip (SoC)
+    devices.
+    """
+    handle = _get_pynvml_device_handle(device_index)
+
+    try:
+        # Return total memory in GB
+        return ceil(pynvml.nvmlDeviceGetMemoryInfo(handle).total / 2**30)
+    except pynvml.NVMLError_NotSupported:
+        return None
 
 
 # =============================================================================

@@ -28,12 +28,14 @@
 )
 @pytest.mark.parametrize("input_type", ["dataframe", "array"])
 def test_pca_fit(data_info, input_type, client):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
 
     nrows, ncols, n_parts = data_info
-    if nrows == int(9e6) and pytest.max_gpu_memory < 48:
+    if nrows == int(9e6) and max_gpu_memory < 48:
         if pytest.adapt_stress_test:
-            nrows = nrows * pytest.max_gpu_memory // 256
-            ncols = ncols * pytest.max_gpu_memory // 256
+            nrows = nrows * max_gpu_memory // 256
+            ncols = ncols * max_gpu_memory // 256
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."

@@ -62,12 +62,15 @@ def test_dbscan(
     out_dtype,
     algorithm,
 ):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
     if algorithm == "rbc":
         if datatype == np.float64 or out_dtype in ["int32", np.int32]:
             pytest.skip("RBC does not support float64 dtype or int32 labels")
-    if nrows == 500000 and pytest.max_gpu_memory < 32:
+    if nrows == 500000 and max_gpu_memory < 32:
         if pytest.adapt_stress_test:
-            nrows = nrows * pytest.max_gpu_memory // 32
+            nrows = nrows * max_gpu_memory // 32
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test. "
@@ -213,9 +216,12 @@ def test_dbscan_cosine(nrows, max_mbytes_per_batch, out_dtype):
 # Vary the eps to get a range of core point counts
 @pytest.mark.parametrize("eps", [0.05, 0.1, 0.5])
 def test_dbscan_sklearn_comparison(name, nrows, eps):
-    if nrows == 500000 and name == "blobs" and pytest.max_gpu_memory < 32:
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
+    if nrows == 500000 and name == "blobs" and max_gpu_memory < 32:
         if pytest.adapt_stress_test:
-            nrows = nrows * pytest.max_gpu_memory // 32
+            nrows = nrows * max_gpu_memory // 32
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."

@@ -119,10 +119,13 @@ def test_lars_model(datatype, nrows, column_info, precompute):
 )
 @pytest.mark.parametrize("precompute", [True, False])
 def test_lars_collinear(datatype, nrows, column_info, precompute):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
     ncols, n_info = column_info
-    if nrows == 500000 and ncols == 1000 and pytest.max_gpu_memory < 32:
+    if nrows == 500000 and ncols == 1000 and max_gpu_memory < 32:
         if pytest.adapt_stress_test:
-            nrows = nrows * pytest.max_gpu_memory // 32
+            nrows = nrows * max_gpu_memory // 32
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."

@@ -45,13 +45,12 @@
 )
 def make_dataset(request):
     nrows, ncols, n_info, datatype = request.param
-    if (
-        nrows == 500000
-        and datatype == np.float64
-        and pytest.max_gpu_memory < 32
-    ):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
+    if nrows == 500000 and datatype == np.float64 and max_gpu_memory < 32:
         if pytest.adapt_stress_test:
-            nrows = nrows * pytest.max_gpu_memory // 32
+            nrows = nrows * max_gpu_memory // 32
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."

@@ -140,10 +140,13 @@ def test_pca_defaults(n_samples, n_features, sparse):
     "name", [unit_param(None), quality_param("iris"), stress_param("blobs")]
 )
 def test_pca_fit_then_transform(datatype, input_type, name, use_handle):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
     blobs_n_samples = 500000
-    if name == "blobs" and pytest.max_gpu_memory < 32:
+    if name == "blobs" and max_gpu_memory < 32:
         if pytest.adapt_stress_test:
-            blobs_n_samples = int(blobs_n_samples * pytest.max_gpu_memory / 32)
+            blobs_n_samples = int(blobs_n_samples * max_gpu_memory / 32)
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."
@@ -193,11 +196,14 @@ def test_pca_fit_then_transform(datatype, input_type, name, use_handle):
     "name", [unit_param(None), quality_param("iris"), stress_param("blobs")]
 )
 def test_pca_fit_transform(datatype, input_type, name, use_handle):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
     blobs_n_samples = 500000
 
-    if name == "blobs" and pytest.max_gpu_memory < 32:
+    if name == "blobs" and max_gpu_memory < 32:
         if pytest.adapt_stress_test:
-            blobs_n_samples = int(blobs_n_samples * pytest.max_gpu_memory / 32)
+            blobs_n_samples = int(blobs_n_samples * max_gpu_memory / 32)
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."
@@ -273,9 +279,12 @@ def test_pca_inverse_transform(datatype, input_type, name, use_handle, nrows):
 @pytest.mark.parametrize("return_sparse", [True, False])
 @pytest.mark.parametrize("cupy_input", [True, False])
 def test_sparse_pca_inputs(nrows, ncols, whiten, return_sparse, cupy_input):
-    if ncols == 20000 and pytest.max_gpu_memory < 48:
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
+    if ncols == 20000 and max_gpu_memory < 48:
         if pytest.adapt_stress_test:
-            ncols = int(ncols * pytest.max_gpu_memory / 48)
+            ncols = int(ncols * max_gpu_memory / 48)
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."

@@ -224,16 +224,19 @@ def assert_model(pickled_model, X_test):
 )
 @pytest.mark.parametrize("fit_intercept", [True, False])
 def test_regressor_pickle(tmpdir, datatype, keys, data_size, fit_intercept):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
     if (
         data_size[0] == 500000
         and datatype == np.float64
         and ("LogisticRegression" in keys or "Ridge" in keys)
-        and pytest.max_gpu_memory < 32
+        and max_gpu_memory < 32
     ):
         if pytest.adapt_stress_test:
-            data_size[0] = data_size[0] * pytest.max_gpu_memory // 640
-            data_size[1] = data_size[1] * pytest.max_gpu_memory // 640
-            data_size[2] = data_size[2] * pytest.max_gpu_memory // 640
+            data_size[0] = data_size[0] * max_gpu_memory // 640
+            data_size[1] = data_size[1] * max_gpu_memory // 640
+            data_size[2] = data_size[2] * max_gpu_memory // 640
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."
@@ -437,13 +440,16 @@ def test_unfit_clone(model_name):
     [unit_param([500, 20, 10, 5]), stress_param([500000, 1000, 500, 50])],
 )
 def test_neighbors_pickle(tmpdir, datatype, keys, data_info):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
     if (
         data_info[0] == 500000
-        and pytest.max_gpu_memory < 32
+        and max_gpu_memory < 32
         and ("KNeighborsClassifier" in keys or "KNeighborsRegressor" in keys)
     ):
         if pytest.adapt_stress_test:
-            data_info[0] = data_info[0] * pytest.max_gpu_memory // 32
+            data_info[0] = data_info[0] * max_gpu_memory // 32
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."
@@ -486,13 +492,16 @@ def assert_model(pickled_model, X_test):
 )
 @pytest.mark.parametrize("keys", k_neighbors_models.keys())
 def test_k_neighbors_classifier_pickle(tmpdir, datatype, data_info, keys):
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
     if (
         data_info[0] == 500000
         and "NearestNeighbors" in keys
-        and pytest.max_gpu_memory < 32
+        and max_gpu_memory < 32
     ):
         if pytest.adapt_stress_test:
-            data_info[0] = data_info[0] * pytest.max_gpu_memory // 32
+            data_info[0] = data_info[0] * max_gpu_memory // 32
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."
@@ -561,9 +570,12 @@ def assert_model(loaded_model, X):
     "data_size", [unit_param([500, 20, 10]), stress_param([500000, 1000, 500])]
 )
 def test_dbscan_pickle(tmpdir, datatype, keys, data_size):
-    if data_size[0] == 500000 and pytest.max_gpu_memory < 32:
+    # Assume at least 4GB memory
+    max_gpu_memory = pytest.max_gpu_memory or 4
+
+    if data_size[0] == 500000 and max_gpu_memory < 32:
         if pytest.adapt_stress_test:
-            data_size[0] = data_size[0] * pytest.max_gpu_memory // 32
+            data_size[0] = data_size[0] * max_gpu_memory // 32
         else:
             pytest.skip(
                 "Insufficient GPU memory for this test."