Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ dependencies:
- pydata-sphinx-theme!=0.14.2
- pylibraft==25.10.*,>=0.0.0a0
- pynndescent
- pynvml>=12.0.0,<13.0.0a0
- pytest
- pytest-benchmark
- pytest-cases
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ dependencies:
- pydata-sphinx-theme!=0.14.2
- pylibraft==25.10.*,>=0.0.0a0
- pynndescent
- pynvml>=12.0.0,<13.0.0a0
- pytest
- pytest-benchmark
- pytest-cases
Expand Down
1 change: 1 addition & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ dependencies:
- nltk
# upstream sklearn docstring tests require numpydoc<1.9
- numpydoc<1.9
- pynvml>=12.0.0,<13.0.0a0
- pyyaml
- pytest
- pytest-benchmark
Expand Down
1 change: 1 addition & 0 deletions python/cuml/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ test = [
"nltk",
"numpydoc<1.9",
"pynndescent",
"pynvml>=12.0.0,<13.0.0a0",
"pytest",
"pytest-benchmark",
"pytest-cases",
Expand Down
82 changes: 67 additions & 15 deletions python/cuml/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#

import os
import subprocess
from datetime import timedelta
from math import ceil
from ssl import create_default_context
Expand All @@ -27,6 +26,7 @@
import hypothesis
import numpy as np
import pandas as pd
import pynvml
import pytest
from sklearn import datasets
from sklearn.datasets import fetch_20newsgroups, fetch_california_housing
Expand Down Expand Up @@ -275,20 +275,72 @@ def pytest_pyfunc_call(pyfuncitem):
pytest.skip("Test requires cudf.pandas accelerator")


def _get_gpu_memory():
"""Get the total GPU memory in GB."""
bash_command = "nvidia-smi --query-gpu=memory.total --format=csv"
output = subprocess.check_output(bash_command, shell=True).decode("utf-8")
lines = output.split("\n")
lines.pop(0)
gpus_memory = []
for line in lines:
tokens = line.split(" ")
if len(tokens) > 1:
gpus_memory.append(int(tokens[0]))
gpus_memory.sort()
max_gpu_memory = ceil(gpus_memory[-1] / 1024)
return max_gpu_memory
def _get_pynvml_device_handle(device_id=0):
"""Get GPU handle from device index or UUID.

Parameters
----------
device_id: int or str
The index or UUID of the device from which to obtain the handle.

Raises
------
ValueError
If acquiring the device handle for the device specified failed.
pynvml.NVMLError
If any NVML error occurred while initializing.

Returns
-------
A pynvml handle to the device.

Examples
--------
>>> _get_pynvml_device_handle(device_id=0)

>>> _get_pynvml_device_handle(device_id="GPU-9fb42d6f-7d6b-368f-f79c-3c3e784c93f6")
"""
pynvml.nvmlInit()

try:
if device_id and not str(device_id).isnumeric():
# This means device_id is UUID.
# This works for both MIG and non-MIG device UUIDs.
handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(device_id))
if pynvml.nvmlDeviceIsMigDeviceHandle(handle):
# Additionally get parent device handle
# if the device itself is a MIG instance
handle = pynvml.nvmlDeviceGetDeviceHandleFromMigDeviceHandle(
handle
)
else:
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
return handle
except pynvml.NVMLError:
raise ValueError(f"Invalid device index or UUID: {device_id}")
Comment on lines +305 to +320
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fairly complicated for what appears to be a rather basic function. Is this really the recommended approach for this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For general support, yes, but I presume this is for CI only so not necessarily all is required. However, this is a verbatim copy from Dask-CUDA, which is probably the only place this function is tested, so I think it makes sense to have a verbatim copy here as it will be less headache for you.

In the long-term, I'd like to have those functions in some shared package so that all RAPIDS projects can piggyback instead of copying verbatim. I've been pushing on that for 2 years but it has been really hard to convince our management of its value, perhaps now that we have similar functions copied in like 50 different places its value will finally become obvious. @quasiben



def _get_gpu_memory(device_index=0):
"""Return total memory of CUDA device with index or with device identifier UUID.

Parameters
----------
device_index: int or str
The index or UUID of the device from which to obtain the CPU affinity.

Returns
-------
The total memory of the CUDA Device in GB, or ``None`` for devices that do not
have a dedicated memory resource, as is usually the case for system on a chip (SoC)
devices.
"""
handle = _get_pynvml_device_handle(device_index)

try:
# Return total memory in GB
return ceil(pynvml.nvmlDeviceGetMemoryInfo(handle).total / 2**30)
except pynvml.NVMLError_NotSupported:
return None


# =============================================================================
Expand Down
8 changes: 5 additions & 3 deletions python/cuml/tests/dask/test_dask_tsvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
)
@pytest.mark.parametrize("input_type", ["dataframe", "array"])
def test_pca_fit(data_info, input_type, client):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

nrows, ncols, n_parts = data_info
if nrows == int(9e6) and pytest.max_gpu_memory < 48:
if nrows == int(9e6) and max_gpu_memory < 48:
if pytest.adapt_stress_test:
nrows = nrows * pytest.max_gpu_memory // 256
ncols = ncols * pytest.max_gpu_memory // 256
nrows = nrows * max_gpu_memory // 256
ncols = ncols * max_gpu_memory // 256
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down
14 changes: 10 additions & 4 deletions python/cuml/tests/test_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,15 @@ def test_dbscan(
out_dtype,
algorithm,
):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if algorithm == "rbc":
if datatype == np.float64 or out_dtype in ["int32", np.int32]:
pytest.skip("RBC does not support float64 dtype or int32 labels")
if nrows == 500000 and pytest.max_gpu_memory < 32:
if nrows == 500000 and max_gpu_memory < 32:
if pytest.adapt_stress_test:
nrows = nrows * pytest.max_gpu_memory // 32
nrows = nrows * max_gpu_memory // 32
else:
pytest.skip(
"Insufficient GPU memory for this test. "
Expand Down Expand Up @@ -213,9 +216,12 @@ def test_dbscan_cosine(nrows, max_mbytes_per_batch, out_dtype):
# Vary the eps to get a range of core point counts
@pytest.mark.parametrize("eps", [0.05, 0.1, 0.5])
def test_dbscan_sklearn_comparison(name, nrows, eps):
if nrows == 500000 and name == "blobs" and pytest.max_gpu_memory < 32:
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if nrows == 500000 and name == "blobs" and max_gpu_memory < 32:
if pytest.adapt_stress_test:
nrows = nrows * pytest.max_gpu_memory // 32
nrows = nrows * max_gpu_memory // 32
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down
7 changes: 5 additions & 2 deletions python/cuml/tests/test_lars.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,13 @@ def test_lars_model(datatype, nrows, column_info, precompute):
)
@pytest.mark.parametrize("precompute", [True, False])
def test_lars_collinear(datatype, nrows, column_info, precompute):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

ncols, n_info = column_info
if nrows == 500000 and ncols == 1000 and pytest.max_gpu_memory < 32:
if nrows == 500000 and ncols == 1000 and max_gpu_memory < 32:
if pytest.adapt_stress_test:
nrows = nrows * pytest.max_gpu_memory // 32
nrows = nrows * max_gpu_memory // 32
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down
11 changes: 5 additions & 6 deletions python/cuml/tests/test_mbsgd_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,12 @@
)
def make_dataset(request):
nrows, ncols, n_info, datatype = request.param
if (
nrows == 500000
and datatype == np.float64
and pytest.max_gpu_memory < 32
):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if nrows == 500000 and datatype == np.float64 and max_gpu_memory < 32:
if pytest.adapt_stress_test:
nrows = nrows * pytest.max_gpu_memory // 32
nrows = nrows * max_gpu_memory // 32
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down
21 changes: 15 additions & 6 deletions python/cuml/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,13 @@ def test_pca_defaults(n_samples, n_features, sparse):
"name", [unit_param(None), quality_param("iris"), stress_param("blobs")]
)
def test_pca_fit_then_transform(datatype, input_type, name, use_handle):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

blobs_n_samples = 500000
if name == "blobs" and pytest.max_gpu_memory < 32:
if name == "blobs" and max_gpu_memory < 32:
if pytest.adapt_stress_test:
blobs_n_samples = int(blobs_n_samples * pytest.max_gpu_memory / 32)
blobs_n_samples = int(blobs_n_samples * max_gpu_memory / 32)
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down Expand Up @@ -193,11 +196,14 @@ def test_pca_fit_then_transform(datatype, input_type, name, use_handle):
"name", [unit_param(None), quality_param("iris"), stress_param("blobs")]
)
def test_pca_fit_transform(datatype, input_type, name, use_handle):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

blobs_n_samples = 500000

if name == "blobs" and pytest.max_gpu_memory < 32:
if name == "blobs" and max_gpu_memory < 32:
if pytest.adapt_stress_test:
blobs_n_samples = int(blobs_n_samples * pytest.max_gpu_memory / 32)
blobs_n_samples = int(blobs_n_samples * max_gpu_memory / 32)
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down Expand Up @@ -273,9 +279,12 @@ def test_pca_inverse_transform(datatype, input_type, name, use_handle, nrows):
@pytest.mark.parametrize("return_sparse", [True, False])
@pytest.mark.parametrize("cupy_input", [True, False])
def test_sparse_pca_inputs(nrows, ncols, whiten, return_sparse, cupy_input):
if ncols == 20000 and pytest.max_gpu_memory < 48:
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if ncols == 20000 and max_gpu_memory < 48:
if pytest.adapt_stress_test:
ncols = int(ncols * pytest.max_gpu_memory / 48)
ncols = int(ncols * max_gpu_memory / 48)
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down
32 changes: 22 additions & 10 deletions python/cuml/tests/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,16 +224,19 @@ def assert_model(pickled_model, X_test):
)
@pytest.mark.parametrize("fit_intercept", [True, False])
def test_regressor_pickle(tmpdir, datatype, keys, data_size, fit_intercept):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if (
data_size[0] == 500000
and datatype == np.float64
and ("LogisticRegression" in keys or "Ridge" in keys)
and pytest.max_gpu_memory < 32
and max_gpu_memory < 32
):
if pytest.adapt_stress_test:
data_size[0] = data_size[0] * pytest.max_gpu_memory // 640
data_size[1] = data_size[1] * pytest.max_gpu_memory // 640
data_size[2] = data_size[2] * pytest.max_gpu_memory // 640
data_size[0] = data_size[0] * max_gpu_memory // 640
data_size[1] = data_size[1] * max_gpu_memory // 640
data_size[2] = data_size[2] * max_gpu_memory // 640
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down Expand Up @@ -437,13 +440,16 @@ def test_unfit_clone(model_name):
[unit_param([500, 20, 10, 5]), stress_param([500000, 1000, 500, 50])],
)
def test_neighbors_pickle(tmpdir, datatype, keys, data_info):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if (
data_info[0] == 500000
and pytest.max_gpu_memory < 32
and max_gpu_memory < 32
and ("KNeighborsClassifier" in keys or "KNeighborsRegressor" in keys)
):
if pytest.adapt_stress_test:
data_info[0] = data_info[0] * pytest.max_gpu_memory // 32
data_info[0] = data_info[0] * max_gpu_memory // 32
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down Expand Up @@ -486,13 +492,16 @@ def assert_model(pickled_model, X_test):
)
@pytest.mark.parametrize("keys", k_neighbors_models.keys())
def test_k_neighbors_classifier_pickle(tmpdir, datatype, data_info, keys):
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if (
data_info[0] == 500000
and "NearestNeighbors" in keys
and pytest.max_gpu_memory < 32
and max_gpu_memory < 32
):
if pytest.adapt_stress_test:
data_info[0] = data_info[0] * pytest.max_gpu_memory // 32
data_info[0] = data_info[0] * max_gpu_memory // 32
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down Expand Up @@ -561,9 +570,12 @@ def assert_model(loaded_model, X):
"data_size", [unit_param([500, 20, 10]), stress_param([500000, 1000, 500])]
)
def test_dbscan_pickle(tmpdir, datatype, keys, data_size):
if data_size[0] == 500000 and pytest.max_gpu_memory < 32:
# Assume at least 4GB memory
max_gpu_memory = pytest.max_gpu_memory or 4

if data_size[0] == 500000 and max_gpu_memory < 32:
if pytest.adapt_stress_test:
data_size[0] = data_size[0] * pytest.max_gpu_memory // 32
data_size[0] = data_size[0] * max_gpu_memory // 32
else:
pytest.skip(
"Insufficient GPU memory for this test."
Expand Down