rapidsai · rapids-bot · May 15, 2025 · May 13, 2025 · May 13, 2025 · May 13, 2025
@@ -349,3 +349,11 @@ def bench_nsmallest(benchmark, dataframe, num_cols_to_sort, n):
 )
 def bench_where(benchmark, dataframe, cond, other):
     benchmark(dataframe.where, cond, other)
+
+
+@benchmark_with_object(
+    cls="dataframe", dtype="float", nulls=False, cols=20, rows=20
+)
+@pytest.mark.pandas_incompatible
+def bench_to_cupy(benchmark, dataframe):
+    benchmark(dataframe.to_cupy)
@@ -23,13 +23,17 @@ def bench_series_nsmallest(benchmark, series, n):
     benchmark(series.nsmallest, n)
 
 
-@benchmark_with_object(cls="series", dtype="int")
+@benchmark_with_object(cls="series", dtype="int", nulls=False)
 def bench_series_cp_asarray(benchmark, series):
-    series = series.dropna()
     benchmark(cupy.asarray, series)
 
 
-@benchmark_with_object(cls="series", dtype="int")
+@benchmark_with_object(cls="series", dtype="int", nulls=False)
+@pytest.mark.pandas_incompatible
+def bench_to_cupy(benchmark, series):
+    benchmark(lambda: series.values)
+
+
+@benchmark_with_object(cls="series", dtype="int", nulls=False)
 def bench_series_values(benchmark, series):
-    series = series.dropna()
     benchmark(lambda: series.values)
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 """Module used for global configuration of benchmarks.
 
@@ -64,7 +64,7 @@ def pytest_sessionfinish(session, exitstatus):
 # Constants used to define benchmarking standards.
 if "CUDF_BENCHMARKS_DEBUG_ONLY" in os.environ:
     NUM_ROWS = [10, 20]
-    NUM_COLS = [1, 6]
+    NUM_COLS = [1, 6, 20]
 else:
-    NUM_ROWS = [100, 10_000, 1_000_000]
-    NUM_COLS = [1, 6]
+    NUM_ROWS = [100, 1_000, 10_000, 50_000, 1_000_000]
+    NUM_COLS = [1, 6, 20, 1_000, 10_000, 50_000]
diff --git a/python/cudf/benchmarks/conftest.py b/python/cudf/benchmarks/conftest.py
@@ -45,7 +45,6 @@
 """
 
 import os
-import string
 import sys
 
 import pytest_cases
@@ -83,14 +82,8 @@ def axis(request):
 for dtype, column_generator in column_generators.items():
 
     def make_dataframe(nr, nc, column_generator=column_generator):
-        assert nc <= len(string.ascii_lowercase), (
-            "make_dataframe only supports a maximum of 26 columns"
-        )
         return cudf.DataFrame(
-            {
-                f"{string.ascii_lowercase[i]}": column_generator(nr)
-                for i in range(nc)
-            }
+            {f"col{i}": column_generator(nr) for i in range(nc)}
         )
 
     for nr in NUM_ROWS:

@@ -524,6 +524,15 @@ def to_array(
                 matrix[:, i] = to_array(col, dtype)
             return matrix
 
+    @_performance_tracking
+    def to_pylibcudf(self) -> tuple[plc.Table, dict[str, Any]]:
+        """
+        Converts Frame to a pylibcudf.Table.
+        Note: This method should not be called directly on a Frame object
+        Instead, it should be called on subclasses like DataFrame/Series.
+        """
+        raise NotImplementedError(f"{type(self)} must implement to_pylibcudf")
+
     @_performance_tracking
     def to_cupy(
         self,
@@ -550,6 +559,51 @@ def to_cupy(
         -------
         cupy.ndarray
         """
+        if (
+            self._num_columns > 1
+            and na_value is None
+            and self._columns[0].dtype.kind in {"i", "u", "f", "b"}
+            and all(
+                not col.nullable and col.dtype == self._columns[0].dtype
+                for col in self._columns
+            )
+        ):
+            if dtype is None:
+                dtype = self._columns[0].dtype
+
+            shape = (len(self), self._num_columns)
+            out = cupy.empty(shape, dtype=dtype, order="F")
+
+            table = plc.Table(
+                [col.to_pylibcudf(mode="read") for col in self._columns]
+            )
+            plc.reshape.table_to_array(
+                table,
+                out.data.ptr,
+                out.nbytes,
+            )
+            return out
+        elif self._num_columns == 1:
+            col = self._columns[0]
+            final_dtype = col.dtype if dtype is None else dtype
+
+            if (
+                not copy
+                and col.dtype.kind in {"i", "u", "f", "b"}
+                and cupy.can_cast(col.dtype, final_dtype)
+            ):
+                if col.has_nulls():
+                    if na_value is not None:
+                        col = col.fillna(na_value)
+                    else:
+                        return self._to_array(
+                            lambda col: col.values,
+                            cupy,
+                            copy,
+                            dtype,
+                            na_value,
+                        )
+                return cupy.asarray(col, dtype=final_dtype).reshape((-1, 1))
         return self._to_array(
             lambda col: col.values,
             cupy,

@@ -139,26 +139,11 @@ def to_cupy(
         -------
         cupy.ndarray
         """
-        col = self._column
-        final_dtype = (
-            col.dtype if dtype is None else dtype
-        )  # some types do not support | operator
-        if (
-            not copy
-            and col.dtype.kind in {"i", "u", "f", "b"}
-            and cp.can_cast(col.dtype, final_dtype)
-            and not col.has_nulls()
-        ):
-            if col.has_nulls():
-                if na_value is not None:
-                    col = col.fillna(na_value)
-                else:
-                    return super().to_cupy(
-                        dtype=dtype, copy=copy, na_value=na_value
-                    )
-            return cp.asarray(col, dtype=final_dtype)
-
-        return super().to_cupy(dtype=dtype, copy=copy, na_value=na_value)
+        return (
+            super()
+            .to_cupy(dtype=dtype, copy=copy, na_value=na_value)
+            .reshape(len(self), order="F")
+        )
 
     @property  # type: ignore
     @_performance_tracking

@@ -1270,6 +1270,34 @@ def test_dataframe_to_cupy():
         np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i])
 
 
+@pytest.mark.parametrize("has_nulls", [False, True])
+@pytest.mark.parametrize("use_na_value", [False, True])
+def test_dataframe_to_cupy_single_column(has_nulls, use_na_value):
+    nelem = 10
+    data = np.arange(nelem, dtype=np.float64)
+
+    if has_nulls:
+        data = data.astype("object")
+        data[::2] = None
+
+    df = cudf.DataFrame({"a": data})
+
+    if has_nulls and not use_na_value:
+        with pytest.raises(ValueError, match="Column must have no nulls"):
+            df.to_cupy()
+        return
+
+    na_value = 0.0 if use_na_value else None
+    expected = (
+        cupy.asarray(df["a"].fillna(na_value))
+        if has_nulls
+        else cupy.asarray(df["a"])
+    )
+    result = df.to_cupy(na_value=na_value)
+    assert result.shape == (nelem, 1)
+    assert_eq(result.ravel(), expected)
+
+
 def test_dataframe_to_cupy_null_values():
     df = cudf.DataFrame()
 

@@ -3091,6 +3091,7 @@ def test_series_to_cupy(dtype, has_nulls, use_na_value):
 
     if not has_nulls:
         assert_eq(sr.values, cp.asarray(sr))
+        return
 
     if has_nulls and not use_na_value:
         with pytest.raises(ValueError, match="Column must have no nulls"):

@@ -1,10 +1,17 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
-from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.libcudf.types cimport size_type, data_type
+from pylibcudf.libcudf.utilities.span cimport device_span
+
+from rmm.librmm.cuda_stream_view cimport cuda_stream_view
+
+cdef extern from "cuda/functional" namespace "cuda::std":
+    cdef cppclass byte:
+        pass
 
 
 cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
@@ -14,3 +21,8 @@ cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil:
     cdef unique_ptr[table] tile(
         table_view source_table, size_type count
     ) except +libcudf_exception_handler
+    cdef void table_to_array(
+        table_view input_table,
+        device_span[byte] output,
+        cuda_stream_view stream
+    ) except +libcudf_exception_handler
@@ -1,11 +1,24 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
+
+from libc.stddef cimport size_t
+from libc.stdint cimport uintptr_t
 
 from pylibcudf.libcudf.types cimport size_type
 
+from rmm.pylibrmm.stream cimport Stream
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
+
 from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
+from .types cimport DataType
 
 
 cpdef Column interleave_columns(Table source_table)
 cpdef Table tile(Table source_table, size_type count)
+cpdef void table_to_array(
+    Table input_table,
+    uintptr_t ptr,
+    size_t size,
+    Stream stream=*
+)
@@ -1,7 +1,15 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from rmm.pylibrmm.stream import Stream
+
 from pylibcudf.column import Column
 from pylibcudf.table import Table
 
 def interleave_columns(source_table: Table) -> Column: ...
 def tile(source_table: Table, count: int) -> Table: ...
+def table_to_array(
+    input_table: Table,
+    ptr: int,
+    size: int,
+    stream: Stream,
+) -> None: ...
@@ -1,19 +1,29 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
+from libc.stddef cimport size_t
+from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
+from libcpp.limits cimport numeric_limits
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.reshape cimport (
     interleave_columns as cpp_interleave_columns,
     tile as cpp_tile,
+    table_to_array as cpp_table_to_array,
+    byte,
 )
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport size_type
 
+from pylibcudf.libcudf.utilities.span cimport device_span
+
+from rmm.pylibrmm.stream cimport Stream
+
 from .column cimport Column
 from .table cimport Table
+from .utils cimport _get_stream
 
-__all__ = ["interleave_columns", "tile"]
+__all__ = ["interleave_columns", "tile", "table_to_array"]
 
 cpdef Column interleave_columns(Table source_table):
     """Interleave columns of a table into a single column.
@@ -67,3 +77,42 @@ cpdef Table tile(Table source_table, size_type count):
         c_result = cpp_tile(source_table.view(), count)
 
     return Table.from_libcudf(move(c_result))
+
+
+cpdef void table_to_array(
+    Table input_table,
+    uintptr_t ptr,
+    size_t size,
+    Stream stream=None
+):
+    """
+    Copy a table into a preallocated column-major device array.
+
+    Parameters
+    ----------
+    input_table : Table
+        A table with fixed-width, non-nullable columns of the same type.
+    ptr : uintptr_t
+        A device pointer to the beginning of the output buffer.
+    size : size_type
+        The total number of bytes available at `ptr`.
+        Must be at least `num_rows * num_columns * sizeof(dtype)`.
+    stream : Stream | None
+        CUDA stream on which to perform the operation.
+    """
+    if size > numeric_limits[size_t].max():
+        raise ValueError(
+            "Size exceeds the int32_t limit."
+        )
+    stream = _get_stream(stream)
+
+    cdef device_span[byte] span = device_span[byte](
+        <byte*> ptr, size
+    )
+
+    with nogil:
+        cpp_table_to_array(
+            input_table.view(),
+            span,
+            stream.view()
+        )