Skip to content
6 changes: 6 additions & 0 deletions python/cudf/benchmarks/API/bench_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,3 +349,9 @@ def bench_nsmallest(benchmark, dataframe, num_cols_to_sort, n):
)
def bench_where(benchmark, dataframe, cond, other):
benchmark(dataframe.where, cond, other)


@benchmark_with_object(cls="dataframe", dtype="float", nulls=False, cols=20)
@pytest.mark.pandas_incompatible
def bench_to_cupy(benchmark, dataframe):
benchmark(dataframe.to_cupy)
6 changes: 3 additions & 3 deletions python/cudf/benchmarks/common/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.

"""Module used for global configuration of benchmarks.

Expand Down Expand Up @@ -64,7 +64,7 @@ def pytest_sessionfinish(session, exitstatus):
# Constants used to define benchmarking standards.
if "CUDF_BENCHMARKS_DEBUG_ONLY" in os.environ:
NUM_ROWS = [10, 20]
NUM_COLS = [1, 6]
NUM_COLS = [1, 6, 20]
else:
NUM_ROWS = [100, 10_000, 1_000_000]
NUM_COLS = [1, 6]
NUM_COLS = [1, 6, 20]
55 changes: 55 additions & 0 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import TYPE_CHECKING, Any, Literal

import cupy
import cupy as cp
import numpy
import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -524,6 +525,15 @@ def to_array(
matrix[:, i] = to_array(col, dtype)
return matrix

@_performance_tracking
def to_pylibcudf(self) -> tuple[plc.Table, dict[str, Any]]:
"""
Converts Frame to a pylibcudf.Table.
Note: This method should not be called directly on a Frame object
Instead, it should be called on subclasses like DataFrame/Series.
"""
raise NotImplementedError(f"{type(self)} must implement to_pylibcudf")

@_performance_tracking
def to_cupy(
self,
Expand All @@ -550,6 +560,51 @@ def to_cupy(
-------
cupy.ndarray
"""
if (
self._num_columns > 1
and na_value is None
and self._columns[0].dtype.kind in {"i", "u", "f", "b"}
and all(
not col.nullable and col.dtype == self._columns[0].dtype
for col in self._columns
)
):
if dtype is None:
dtype = np.dtype(self._columns[0].dtype)

shape = (len(self), self._num_columns)
out = cupy.empty(shape, dtype=dtype, order="F")

table = self.to_pylibcudf()[0]
if isinstance(table, plc.Column):
table = plc.Table([table])
plc.reshape.table_to_array(
table,
out.data.ptr,
out.nbytes,
)
return out
elif self._num_columns == 1:
col = self._columns[0]
final_dtype = col.dtype if dtype is None else dtype

if (
not copy
and col.dtype.kind in {"i", "u", "f", "b"}
and cp.can_cast(col.dtype, final_dtype)
):
if col.has_nulls():
if na_value is not None:
col = col.fillna(na_value)
else:
return self._to_array(
lambda col: col.values,
cupy,
copy,
dtype,
na_value,
)
return cp.asarray(col, dtype=final_dtype).reshape((-1, 1))
return self._to_array(
lambda col: col.values,
cupy,
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ def to_cupy(
not copy
and col.dtype.kind in {"i", "u", "f", "b"}
and cp.can_cast(col.dtype, final_dtype)
and not col.has_nulls()
):
if col.has_nulls():
if na_value is not None:
Expand Down
28 changes: 28 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,34 @@ def test_dataframe_to_cupy():
np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])


@pytest.mark.parametrize("has_nulls", [False, True])
@pytest.mark.parametrize("use_na_value", [False, True])
def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):
nelem = 10
data = np.arange(nelem, dtype=np.float64)

if has_nulls:
data = data.astype("object")
data[::2] = None

df = cudf.DataFrame({"a": data})

if has_nulls and not use_na_value:
with pytest.raises(ValueError, match="Column must have no nulls"):
df.to_cupy()
return

na_value = 0.0 if use_na_value else None
expected = (
cupy.asarray(df["a"].fillna(na_value))
if has_nulls
else cupy.asarray(df["a"])
)
result = df.to_cupy(na_value=na_value)
assert result.shape == (nelem, 1)
assert_eq(result.ravel(), expected)


def test_dataframe_to_cupy_null_values():
df = cudf.DataFrame()

Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3091,6 +3091,7 @@ def test_series_to_cupy(dtype, has_nulls, use_na_value):

if not has_nulls:
assert_eq(sr.values, cp.asarray(sr))
return

if has_nulls and not use_na_value:
with pytest.raises(ValueError, match="Column must have no nulls"):
Expand Down
16 changes: 14 additions & 2 deletions python/pylibcudf/pylibcudf/libcudf/reshape.pxd
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
from libcpp.memory cimport unique_ptr
from pylibcudf.exception_handler cimport libcudf_exception_handler
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport size_type
from pylibcudf.libcudf.types cimport size_type, data_type
from pylibcudf.libcudf.utilities.span cimport device_span

from rmm.librmm.cuda_stream_view cimport cuda_stream_view

cdef extern from "cuda/functional" namespace "cuda::std":
cdef cppclass byte:
pass


cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
Expand All @@ -14,3 +21,8 @@ cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
cdef unique_ptr[table] tile(
table_view source_table, size_type count
) except +libcudf_exception_handler
cdef void table_to_array(
table_view input_table,
device_span[byte] output,
cuda_stream_view stream
) except +libcudf_exception_handler
14 changes: 13 additions & 1 deletion python/pylibcudf/pylibcudf/reshape.pxd
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from libc.stdint cimport uintptr_t

from pylibcudf.libcudf.types cimport size_type

from rmm.pylibrmm.stream cimport Stream
from rmm.pylibrmm.device_buffer cimport DeviceBuffer

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table
from .types cimport DataType


cpdef Column interleave_columns(Table source_table)
cpdef Table tile(Table source_table, size_type count)
cpdef void table_to_array(
Table input_table,
uintptr_t ptr,
size_type size,
Stream stream=*
)
8 changes: 8 additions & 0 deletions python/pylibcudf/pylibcudf/reshape.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from rmm.pylibrmm.stream import Stream

from pylibcudf.column import Column
from pylibcudf.table import Table

def interleave_columns(source_table: Table) -> Column: ...
def tile(source_table: Table, count: int) -> Table: ...
def table_to_array(
input_table: Table,
ptr: int,
size: int,
stream: Stream,
) -> None: ...
51 changes: 49 additions & 2 deletions python/pylibcudf/pylibcudf/reshape.pyx
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from libc.stdint cimport uintptr_t
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.limits cimport numeric_limits
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.reshape cimport (
interleave_columns as cpp_interleave_columns,
tile as cpp_tile,
table_to_array as cpp_table_to_array,
byte,
)
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.types cimport size_type

from pylibcudf.libcudf.utilities.span cimport device_span

from rmm.pylibrmm.stream cimport Stream

from .column cimport Column
from .table cimport Table
from .utils cimport _get_stream

__all__ = ["interleave_columns", "tile"]
__all__ = ["interleave_columns", "tile", "table_to_array"]

cpdef Column interleave_columns(Table source_table):
"""Interleave columns of a table into a single column.
Expand Down Expand Up @@ -67,3 +76,41 @@ cpdef Table tile(Table source_table, size_type count):
c_result = cpp_tile(source_table.view(), count)

return Table.from_libcudf(move(c_result))


cpdef void table_to_array(
Table input_table,
uintptr_t ptr,
size_type size,
Stream stream=None
):
"""
Copy a table into a preallocated column-major device array.
Parameters
----------
input_table : Table
A table with fixed-width, non-nullable columns of the same type.
ptr : uintptr_t
A device pointer to the beginning of the output buffer.
size : size_type
The total number of bytes available at `ptr`.
Must be at least `num_rows * num_columns * sizeof(dtype)`.
stream : Stream | None
CUDA stream on which to perform the operation.
"""
if size > numeric_limits[size_type].max():
raise ValueError(
"Size exceeds the int32_t limit."
)
stream = _get_stream(stream)

cdef device_span[byte] span = device_span[byte](
<byte*> ptr, size
)

with nogil:
cpp_table_to_array(
input_table.view(),
span,
stream.view()
)
42 changes: 42 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@
from utils import assert_column_eq, assert_table_eq

import pylibcudf as plc
from pylibcudf.types import TypeId


@pytest.fixture(scope="module")
def np():
return pytest.importorskip("cupy")


@pytest.fixture(scope="module")
def cp():
return pytest.importorskip("cupy")


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -37,3 +48,34 @@ def test_tile(reshape_data, cnt):
)

assert_table_eq(expect, got)


@pytest.mark.parametrize(
"dtype, type_id",
[
("int32", TypeId.INT32),
("int64", TypeId.INT64),
("float32", TypeId.FLOAT32),
("float64", TypeId.FLOAT64),
],
)
def test_table_to_array(dtype, type_id, np, cp):
arrow_type = pa.from_numpy_dtype(getattr(np, dtype))
arrs = [
pa.array([1, 2, 3], type=arrow_type),
pa.array([4, 5, 6], type=arrow_type),
]
arrow_tbl = pa.Table.from_arrays(arrs, names=["a", "b"])
tbl = plc.interop.from_arrow(arrow_tbl)

rows, cols = tbl.num_rows(), tbl.num_columns()
got = cp.empty((rows, cols), dtype=dtype, order="F")

plc.reshape.table_to_array(
tbl,
got.data.ptr,
got.nbytes,
)

expect = cp.array([[1, 4], [2, 5], [3, 6]], dtype=dtype)
cp.testing.assert_array_equal(expect, got)
Loading