Skip to content

Commit 7694248

Browse files
authored
Add fast paths for DataFrame.to_cupy (#18801)
Contributes to #16483 by adding fast paths to `DataFrame.to_cupy` (which is called when `DataFrame.values` is called). The PR follows up #18450 to add cython bindings for `cudf::table_to_array` to pylibcudf and plumbs those changes through cudf classic. I benchmarked the fast (True) and slow (False) when the dataframe has 1, 6, 20, and 100 columns. The fast paths use `cudf::table_to_array` if the number of columns is greater than 1 and `cp.asarray` directly if the dataframe has only one column. The slow path uses a [raw python loop + assignment](https://github.com/rapidsai/cudf/blob/35d58394e7fb5a090ff3cda351403ec092476af5/python/cudf/cudf/core/frame.py#L520) to create the cupy array. ![image](https://github.com/user-attachments/assets/4c9edfa0-e15d-4902-b597-675cfb02343d) I used the median because the CUDA overhead of calling `cudf::table_to_array` is large (so there are outliers in the times). Here is a profile of calling `to_cupy` twice for both the slow and fast paths. ![Screenshot from 2025-05-13 12-23-46](https://github.com/user-attachments/assets/d84fdfa3-3696-4df8-91b6-3eb9dde65430) In the first calls, the fast path takes 7.3 ms vs 4.8 ms for the slow path. The first call to `cudf::table_to_array` is the bottleneck. But if you compare the second calls, the fast path is much faster (79 us vs 2.3ms) Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Bradley Dice (https://github.com/bdice) - Matthew Roeschke (https://github.com/mroeschke) URL: #18801
1 parent 9c6f5f0 commit 7694248

File tree

16 files changed

+306
-32
lines changed

16 files changed

+306
-32
lines changed

docs/cudf/source/user_guide/api_docs/index_objects.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ Conversion
110110
Index.to_frame
111111
Index.to_pandas
112112
Index.to_dlpack
113+
Index.to_pylibcudf
114+
Index.from_pylibcudf
113115
Index.from_pandas
114116
Index.from_arrow
115117

python/cudf/benchmarks/API/bench_dataframe.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,3 +349,11 @@ def bench_nsmallest(benchmark, dataframe, num_cols_to_sort, n):
349349
)
350350
def bench_where(benchmark, dataframe, cond, other):
351351
benchmark(dataframe.where, cond, other)
352+
353+
354+
@benchmark_with_object(
355+
cls="dataframe", dtype="float", nulls=False, cols=20, rows=20
356+
)
357+
@pytest.mark.pandas_incompatible
358+
def bench_to_cupy(benchmark, dataframe):
359+
benchmark(dataframe.to_cupy)

python/cudf/benchmarks/API/bench_series.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@ def bench_series_nsmallest(benchmark, series, n):
2323
benchmark(series.nsmallest, n)
2424

2525

26-
@benchmark_with_object(cls="series", dtype="int")
26+
@benchmark_with_object(cls="series", dtype="int", nulls=False)
2727
def bench_series_cp_asarray(benchmark, series):
28-
series = series.dropna()
2928
benchmark(cupy.asarray, series)
3029

3130

32-
@benchmark_with_object(cls="series", dtype="int")
31+
@benchmark_with_object(cls="series", dtype="int", nulls=False)
32+
@pytest.mark.pandas_incompatible
33+
def bench_to_cupy(benchmark, series):
34+
benchmark(lambda: series.values)
35+
36+
37+
@benchmark_with_object(cls="series", dtype="int", nulls=False)
3338
def bench_series_values(benchmark, series):
34-
series = series.dropna()
3539
benchmark(lambda: series.values)

python/cudf/benchmarks/common/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
22

33
"""Module used for global configuration of benchmarks.
44
@@ -64,7 +64,7 @@ def pytest_sessionfinish(session, exitstatus):
6464
# Constants used to define benchmarking standards.
6565
if "CUDF_BENCHMARKS_DEBUG_ONLY" in os.environ:
6666
NUM_ROWS = [10, 20]
67-
NUM_COLS = [1, 6]
67+
NUM_COLS = [1, 6, 20]
6868
else:
6969
NUM_ROWS = [100, 10_000, 1_000_000]
70-
NUM_COLS = [1, 6]
70+
NUM_COLS = [1, 6, 20]

python/cudf/cudf/core/frame.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,15 @@ def to_array(
587587
matrix[:, i] = to_array(col, dtype)
588588
return matrix
589589

590+
@_performance_tracking
591+
def to_pylibcudf(self) -> tuple[plc.Table, dict[str, Any]]:
592+
"""
593+
Converts Frame to a pylibcudf.Table.
594+
Note: This method should not be called directly on a Frame object
595+
Instead, it should be called on subclasses like DataFrame/Series.
596+
"""
597+
raise NotImplementedError(f"{type(self)} must implement to_pylibcudf")
598+
590599
@_performance_tracking
591600
def to_cupy(
592601
self,
@@ -613,6 +622,51 @@ def to_cupy(
613622
-------
614623
cupy.ndarray
615624
"""
625+
if (
626+
self._num_columns > 1
627+
and na_value is None
628+
and self._columns[0].dtype.kind in {"i", "u", "f", "b"}
629+
and all(
630+
not col.nullable and col.dtype == self._columns[0].dtype
631+
for col in self._columns
632+
)
633+
):
634+
if dtype is None:
635+
dtype = self._columns[0].dtype
636+
637+
shape = (len(self), self._num_columns)
638+
out = cupy.empty(shape, dtype=dtype, order="F")
639+
640+
table = plc.Table(
641+
[col.to_pylibcudf(mode="read") for col in self._columns]
642+
)
643+
plc.reshape.table_to_array(
644+
table,
645+
out.data.ptr,
646+
out.nbytes,
647+
)
648+
return out
649+
elif self._num_columns == 1:
650+
col = self._columns[0]
651+
final_dtype = col.dtype if dtype is None else dtype
652+
653+
if (
654+
not copy
655+
and col.dtype.kind in {"i", "u", "f", "b"}
656+
and cupy.can_cast(col.dtype, final_dtype)
657+
):
658+
if col.has_nulls():
659+
if na_value is not None:
660+
col = col.fillna(na_value)
661+
else:
662+
return self._to_array(
663+
lambda col: col.values,
664+
cupy,
665+
copy,
666+
dtype,
667+
na_value,
668+
)
669+
return cupy.asarray(col, dtype=final_dtype).reshape((-1, 1))
616670
return self._to_array(
617671
lambda col: col.values,
618672
cupy,

python/cudf/cudf/core/index.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,74 @@ def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self:
323323
def _from_data_like_self(self, data: MutableMapping) -> Self:
324324
return _index_from_data(data, self.name)
325325

326+
@_performance_tracking
327+
def to_pylibcudf(self, copy=False) -> tuple[plc.Column, dict]:
328+
"""
329+
Convert this Index to a pylibcudf.Column.
330+
331+
Parameters
332+
----------
333+
copy : bool
334+
Whether or not to generate a new copy of the underlying device data
335+
336+
Returns
337+
-------
338+
pylibcudf.Column
339+
A new pylibcudf.Column referencing the same data.
340+
dict
341+
Dict of metadata (includes name)
342+
343+
Notes
344+
-----
345+
User requests to convert to pylibcudf must assume that the
346+
data may be modified afterwards.
347+
"""
348+
if copy:
349+
raise NotImplementedError("copy=True is not supported")
350+
metadata = {"name": self.name}
351+
return self._column.to_pylibcudf(mode="write"), metadata
352+
353+
@classmethod
354+
@_performance_tracking
355+
def from_pylibcudf(
356+
cls, col: plc.Column, metadata: dict | None = None
357+
) -> Self:
358+
"""
359+
Create a Index from a pylibcudf.Column.
360+
361+
Parameters
362+
----------
363+
col : pylibcudf.Column
364+
The input Column.
365+
366+
Returns
367+
-------
368+
pylibcudf.Column
369+
A new pylibcudf.Column referencing the same data.
370+
metadata : dict | None
371+
The Index metadata.
372+
373+
Notes
374+
-----
375+
This function will generate an Index which contains a Column
376+
pointing to the provided pylibcudf Column. It will directly access
377+
the data and mask buffers of the pylibcudf Column, so the newly created
378+
object is not tied to the lifetime of the original pylibcudf.Column.
379+
"""
380+
name = None
381+
if metadata is not None:
382+
if not (
383+
isinstance(metadata, dict)
384+
and len(metadata) == 1
385+
and set(metadata) == {"name"}
386+
):
387+
raise ValueError("Metadata dict must only contain a name")
388+
name = metadata.get("name")
389+
return cls._from_column(
390+
ColumnBase.from_pylibcudf(col, data_ptr_exposed=True),
391+
name=name,
392+
)
393+
326394
@classmethod
327395
@_performance_tracking
328396
def from_arrow(cls, obj: pa.Array) -> Index | cudf.MultiIndex:

python/cudf/cudf/core/series.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3820,6 +3820,8 @@ def from_pylibcudf(
38203820
----------
38213821
col : pylibcudf.Column
38223822
The input Column.
3823+
metadata : dict | None
3824+
The Series metadata.
38233825
38243826
Returns
38253827
-------

python/cudf/cudf/core/single_column_frame.py

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -139,26 +139,11 @@ def to_cupy(
139139
-------
140140
cupy.ndarray
141141
"""
142-
col = self._column
143-
final_dtype = (
144-
col.dtype if dtype is None else dtype
145-
) # some types do not support | operator
146-
if (
147-
not copy
148-
and col.dtype.kind in {"i", "u", "f", "b"}
149-
and cp.can_cast(col.dtype, final_dtype)
150-
and not col.has_nulls()
151-
):
152-
if col.has_nulls():
153-
if na_value is not None:
154-
col = col.fillna(na_value)
155-
else:
156-
return super().to_cupy(
157-
dtype=dtype, copy=copy, na_value=na_value
158-
)
159-
return cp.asarray(col, dtype=final_dtype)
160-
161-
return super().to_cupy(dtype=dtype, copy=copy, na_value=na_value)
142+
return (
143+
super()
144+
.to_cupy(dtype=dtype, copy=copy, na_value=na_value)
145+
.reshape(len(self), order="F")
146+
)
162147

163148
@property # type: ignore
164149
@_performance_tracking

python/cudf/cudf/tests/test_dataframe.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,6 +1270,34 @@ def test_dataframe_to_cupy():
12701270
np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])
12711271

12721272

1273+
@pytest.mark.parametrize("has_nulls", [False, True])
1274+
@pytest.mark.parametrize("use_na_value", [False, True])
1275+
def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):
1276+
nelem = 10
1277+
data = np.arange(nelem, dtype=np.float64)
1278+
1279+
if has_nulls:
1280+
data = data.astype("object")
1281+
data[::2] = None
1282+
1283+
df = cudf.DataFrame({"a": data})
1284+
1285+
if has_nulls and not use_na_value:
1286+
with pytest.raises(ValueError, match="Column must have no nulls"):
1287+
df.to_cupy()
1288+
return
1289+
1290+
na_value = 0.0 if use_na_value else None
1291+
expected = (
1292+
cupy.asarray(df["a"].fillna(na_value))
1293+
if has_nulls
1294+
else cupy.asarray(df["a"])
1295+
)
1296+
result = df.to_cupy(na_value=na_value)
1297+
assert result.shape == (nelem, 1)
1298+
assert_eq(result.ravel(), expected)
1299+
1300+
12731301
def test_dataframe_to_cupy_null_values():
12741302
df = cudf.DataFrame()
12751303

python/cudf/cudf/tests/test_index.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3343,3 +3343,10 @@ def test_categoricalindex_from_codes(ordered, name):
33433343
name=name,
33443344
)
33453345
assert_eq(result, expected)
3346+
3347+
3348+
def test_roundtrip_index_plc_column():
3349+
index = cudf.Index([1])
3350+
expect = cudf.Index(index)
3351+
actual = cudf.Index.from_pylibcudf(*expect.to_pylibcudf())
3352+
assert_eq(expect, actual)

0 commit comments

Comments
 (0)