Skip to content
8 changes: 8 additions & 0 deletions python/cudf/benchmarks/API/bench_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,3 +349,11 @@ def bench_nsmallest(benchmark, dataframe, num_cols_to_sort, n):
)
def bench_where(benchmark, dataframe, cond, other):
benchmark(dataframe.where, cond, other)


@benchmark_with_object(
cls="dataframe", dtype="float", nulls=False, cols=20, rows=20
)
@pytest.mark.pandas_incompatible
def bench_to_cupy(benchmark, dataframe):
benchmark(dataframe.to_cupy)
12 changes: 8 additions & 4 deletions python/cudf/benchmarks/API/bench_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ def bench_series_nsmallest(benchmark, series, n):
benchmark(series.nsmallest, n)


@benchmark_with_object(cls="series", dtype="int")
@benchmark_with_object(cls="series", dtype="int", nulls=False)
def bench_series_cp_asarray(benchmark, series):
series = series.dropna()
benchmark(cupy.asarray, series)


@benchmark_with_object(cls="series", dtype="int")
@benchmark_with_object(cls="series", dtype="int", nulls=False)
@pytest.mark.pandas_incompatible
def bench_to_cupy(benchmark, series):
benchmark(lambda: series.values)


@benchmark_with_object(cls="series", dtype="int", nulls=False)
def bench_series_values(benchmark, series):
series = series.dropna()
benchmark(lambda: series.values)
8 changes: 4 additions & 4 deletions python/cudf/benchmarks/common/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.

"""Module used for global configuration of benchmarks.

Expand Down Expand Up @@ -64,7 +64,7 @@ def pytest_sessionfinish(session, exitstatus):
# Constants used to define benchmarking standards.
if "CUDF_BENCHMARKS_DEBUG_ONLY" in os.environ:
NUM_ROWS = [10, 20]
NUM_COLS = [1, 6]
NUM_COLS = [1, 6, 20]
else:
NUM_ROWS = [100, 10_000, 1_000_000]
NUM_COLS = [1, 6]
NUM_ROWS = [100, 1_000, 10_000, 50_000, 1_000_000]
NUM_COLS = [1, 6, 20, 1_000, 10_000, 50_000]
9 changes: 1 addition & 8 deletions python/cudf/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
"""

import os
import string
import sys

import pytest_cases
Expand Down Expand Up @@ -83,14 +82,8 @@ def axis(request):
for dtype, column_generator in column_generators.items():

def make_dataframe(nr, nc, column_generator=column_generator):
assert nc <= len(string.ascii_lowercase), (
"make_dataframe only supports a maximum of 26 columns"
)
return cudf.DataFrame(
{
f"{string.ascii_lowercase[i]}": column_generator(nr)
for i in range(nc)
}
{f"col{i}": column_generator(nr) for i in range(nc)}
)

for nr in NUM_ROWS:
Expand Down
54 changes: 54 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,15 @@ def to_array(
matrix[:, i] = to_array(col, dtype)
return matrix

@_performance_tracking
def to_pylibcudf(self) -> tuple[plc.Table, dict[str, Any]]:
"""
Converts Frame to a pylibcudf.Table.
Note: This method should not be called directly on a Frame object
Instead, it should be called on subclasses like DataFrame/Series.
"""
raise NotImplementedError(f"{type(self)} must implement to_pylibcudf")

@_performance_tracking
def to_cupy(
self,
Expand All @@ -550,6 +559,51 @@ def to_cupy(
-------
cupy.ndarray
"""
if (
self._num_columns > 1
and na_value is None
and self._columns[0].dtype.kind in {"i", "u", "f", "b"}
and all(
not col.nullable and col.dtype == self._columns[0].dtype
for col in self._columns
)
):
if dtype is None:
dtype = self._columns[0].dtype

shape = (len(self), self._num_columns)
out = cupy.empty(shape, dtype=dtype, order="F")

table = plc.Table(
[col.to_pylibcudf(mode="read") for col in self._columns]
)
plc.reshape.table_to_array(
table,
out.data.ptr,
out.nbytes,
)
return out
elif self._num_columns == 1:
col = self._columns[0]
final_dtype = col.dtype if dtype is None else dtype

if (
not copy
and col.dtype.kind in {"i", "u", "f", "b"}
and cupy.can_cast(col.dtype, final_dtype)
):
if col.has_nulls():
if na_value is not None:
col = col.fillna(na_value)
else:
return self._to_array(
lambda col: col.values,
cupy,
copy,
dtype,
na_value,
)
return cupy.asarray(col, dtype=final_dtype).reshape((-1, 1))
return self._to_array(
lambda col: col.values,
cupy,
Expand Down
25 changes: 5 additions & 20 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,26 +139,11 @@ def to_cupy(
-------
cupy.ndarray
"""
col = self._column
final_dtype = (
col.dtype if dtype is None else dtype
) # some types do not support | operator
if (
not copy
and col.dtype.kind in {"i", "u", "f", "b"}
and cp.can_cast(col.dtype, final_dtype)
and not col.has_nulls()
):
if col.has_nulls():
if na_value is not None:
col = col.fillna(na_value)
else:
return super().to_cupy(
dtype=dtype, copy=copy, na_value=na_value
)
return cp.asarray(col, dtype=final_dtype)

return super().to_cupy(dtype=dtype, copy=copy, na_value=na_value)
return (
super()
.to_cupy(dtype=dtype, copy=copy, na_value=na_value)
.reshape(len(self), order="F")
)

@property # type: ignore
@_performance_tracking
Expand Down
28 changes: 28 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,34 @@ def test_dataframe_to_cupy():
np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])


@pytest.mark.parametrize("has_nulls", [False, True])
@pytest.mark.parametrize("use_na_value", [False, True])
def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):
nelem = 10
data = np.arange(nelem, dtype=np.float64)

if has_nulls:
data = data.astype("object")
data[::2] = None

df = cudf.DataFrame({"a": data})

if has_nulls and not use_na_value:
with pytest.raises(ValueError, match="Column must have no nulls"):
df.to_cupy()
return

na_value = 0.0 if use_na_value else None
expected = (
cupy.asarray(df["a"].fillna(na_value))
if has_nulls
else cupy.asarray(df["a"])
)
result = df.to_cupy(na_value=na_value)
assert result.shape == (nelem, 1)
assert_eq(result.ravel(), expected)


def test_dataframe_to_cupy_null_values():
df = cudf.DataFrame()

Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3091,6 +3091,7 @@ def test_series_to_cupy(dtype, has_nulls, use_na_value):

if not has_nulls:
assert_eq(sr.values, cp.asarray(sr))
return

if has_nulls and not use_na_value:
with pytest.raises(ValueError, match="Column must have no nulls"):
Expand Down
16 changes: 14 additions & 2 deletions python/pylibcudf/pylibcudf/libcudf/reshape.pxd
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
from libcpp.memory cimport unique_ptr
from pylibcudf.exception_handler cimport libcudf_exception_handler
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport size_type
from pylibcudf.libcudf.types cimport size_type, data_type
from pylibcudf.libcudf.utilities.span cimport device_span

from rmm.librmm.cuda_stream_view cimport cuda_stream_view

cdef extern from "cuda/functional" namespace "cuda::std":
cdef cppclass byte:
pass


cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
Expand All @@ -14,3 +21,8 @@ cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
cdef unique_ptr[table] tile(
table_view source_table, size_type count
) except +libcudf_exception_handler
cdef void table_to_array(
table_view input_table,
device_span[byte] output,
cuda_stream_view stream
) except +libcudf_exception_handler
15 changes: 14 additions & 1 deletion python/pylibcudf/pylibcudf/reshape.pxd
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from libc.stddef cimport size_t
from libc.stdint cimport uintptr_t

from pylibcudf.libcudf.types cimport size_type

from rmm.pylibrmm.stream cimport Stream
from rmm.pylibrmm.device_buffer cimport DeviceBuffer

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table
from .types cimport DataType


cpdef Column interleave_columns(Table source_table)
cpdef Table tile(Table source_table, size_type count)
cpdef void table_to_array(
Table input_table,
uintptr_t ptr,
size_t size,
Stream stream=*
)
8 changes: 8 additions & 0 deletions python/pylibcudf/pylibcudf/reshape.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from rmm.pylibrmm.stream import Stream

from pylibcudf.column import Column
from pylibcudf.table import Table

def interleave_columns(source_table: Table) -> Column: ...
def tile(source_table: Table, count: int) -> Table: ...
def table_to_array(
input_table: Table,
ptr: int,
size: int,
stream: Stream,
) -> None: ...
53 changes: 51 additions & 2 deletions python/pylibcudf/pylibcudf/reshape.pyx
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from libc.stddef cimport size_t
from libc.stdint cimport uintptr_t
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.limits cimport numeric_limits
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.reshape cimport (
interleave_columns as cpp_interleave_columns,
tile as cpp_tile,
table_to_array as cpp_table_to_array,
byte,
)
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.types cimport size_type

from pylibcudf.libcudf.utilities.span cimport device_span

from rmm.pylibrmm.stream cimport Stream

from .column cimport Column
from .table cimport Table
from .utils cimport _get_stream

__all__ = ["interleave_columns", "tile"]
__all__ = ["interleave_columns", "tile", "table_to_array"]

cpdef Column interleave_columns(Table source_table):
"""Interleave columns of a table into a single column.
Expand Down Expand Up @@ -67,3 +77,42 @@ cpdef Table tile(Table source_table, size_type count):
c_result = cpp_tile(source_table.view(), count)

return Table.from_libcudf(move(c_result))


cpdef void table_to_array(
Table input_table,
uintptr_t ptr,
size_t size,
Stream stream=None
):
"""
Copy a table into a preallocated column-major device array.

Parameters
----------
input_table : Table
A table with fixed-width, non-nullable columns of the same type.
ptr : uintptr_t
A device pointer to the beginning of the output buffer.
size : size_type
The total number of bytes available at `ptr`.
Must be at least `num_rows * num_columns * sizeof(dtype)`.
stream : Stream | None
CUDA stream on which to perform the operation.
"""
if size > numeric_limits[size_t].max():
raise ValueError(
"Size exceeds the int32_t limit."
)
stream = _get_stream(stream)

cdef device_span[byte] span = device_span[byte](
<byte*> ptr, size
)

with nogil:
cpp_table_to_array(
input_table.view(),
span,
stream.view()
)
Loading
Loading