Skip to content

Commit 0ec0e0c

Browse files
committed
Make cudf._lib.string_udf work with pylibcudf Columns instead of cudf._lib Columns
1 parent dc2a75c commit 0ec0e0c

File tree

3 files changed

+26
-16
lines changed

3 files changed

+26
-16
lines changed

python/cudf/cudf/_lib/strings_udf.pyx

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
22

33
from libc.stdint cimport uint8_t, uint16_t, uintptr_t
44
from pylibcudf.libcudf.strings_udf cimport (
@@ -25,15 +25,14 @@ from pylibcudf.libcudf.strings_udf cimport (
2525
from rmm.librmm.device_buffer cimport device_buffer
2626
from rmm.pylibrmm.device_buffer cimport DeviceBuffer
2727

28-
from cudf._lib.column cimport Column
2928
from pylibcudf cimport Column as plc_Column
3029

3130

3231
def get_cuda_build_version():
3332
return cpp_get_cuda_build_version()
3433

3534

36-
def column_to_string_view_array(Column strings_col):
35+
def column_to_string_view_array(plc_Column strings_col):
3736
cdef unique_ptr[device_buffer] c_buffer
3837
cdef column_view input_view = strings_col.view()
3938
with nogil:
@@ -52,9 +51,7 @@ def column_from_udf_string_array(DeviceBuffer d_buffer):
5251
c_result = move(cpp_column_from_udf_string_array(data, size))
5352
cpp_free_udf_string_array(data, size)
5453

55-
return Column.from_pylibcudf(
56-
plc_Column.from_libcudf(move(c_result))
57-
)
54+
return plc_Column.from_libcudf(move(c_result))
5855

5956

6057
def get_character_flags_table_ptr():

python/cudf/cudf/core/udf/utils.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
22
from __future__ import annotations
33

44
import functools
@@ -20,6 +20,7 @@
2020
import rmm
2121

2222
from cudf._lib import strings_udf
23+
from cudf._lib.column import Column
2324
from cudf.api.types import is_scalar
2425
from cudf.core.column.column import as_column
2526
from cudf.core.dtypes import dtype
@@ -44,6 +45,11 @@
4445
if TYPE_CHECKING:
4546
from collections.abc import Callable
4647

48+
import pylibcudf as plc
49+
50+
from cudf.core.buffer.buffer import Buffer
51+
from cudf.core.indexed_frame import IndexedFrame
52+
4753
# Maximum size of a string column is 2 GiB
4854
_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get("STRINGS_UDF_HEAP_SIZE", 2**31)
4955
_heap_size = 0
@@ -298,12 +304,14 @@ def _get_kernel(kernel_string, globals_, sig, func):
298304
return kernel
299305

300306

301-
def _get_input_args_from_frame(fr):
302-
args = []
307+
def _get_input_args_from_frame(fr: IndexedFrame) -> list:
308+
args: list[Buffer | tuple[Buffer, Buffer]] = []
303309
offsets = []
304310
for col in _supported_cols_from_frame(fr).values():
305311
if col.dtype == _cudf_str_dtype:
306-
data = column_to_string_view_array_init_heap(col)
312+
data = column_to_string_view_array_init_heap(
313+
col.to_pylibcudf(mode="read")
314+
)
307315
else:
308316
data = col.data
309317
if col.mask is not None:
@@ -325,7 +333,9 @@ def _return_arr_from_dtype(dtype, size):
325333

326334
def _post_process_output_col(col, retty):
327335
if retty == _cudf_str_dtype:
328-
return strings_udf.column_from_udf_string_array(col)
336+
return Column.from_pylibcudf(
337+
strings_udf.column_from_udf_string_array(col)
338+
)
329339
return as_column(col, retty)
330340

331341

@@ -365,7 +375,7 @@ def set_malloc_heap_size(size=None):
365375
_heap_size = size
366376

367377

368-
def column_to_string_view_array_init_heap(col):
378+
def column_to_string_view_array_init_heap(col: plc.Column) -> Buffer:
369379
# lazily allocate heap only when a string needs to be returned
370380
return strings_udf.column_to_string_view_array(col)
371381

python/cudf/cudf/tests/test_string_udfs.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
22

33
import numba
44
import numpy as np
@@ -11,6 +11,7 @@
1111
import rmm
1212

1313
import cudf
14+
from cudf._lib.column import Column
1415
from cudf._lib.strings_udf import (
1516
column_from_udf_string_array,
1617
column_to_string_view_array,
@@ -87,14 +88,16 @@ def run_udf_test(data, func, dtype):
8788
)
8889

8990
cudf_column = cudf.core.column.as_column(data)
90-
str_views = column_to_string_view_array(cudf_column)
91+
str_views = column_to_string_view_array(
92+
cudf_column.to_pylibcudf(mode="read")
93+
)
9194
sv_kernel, udf_str_kernel = get_kernels(func, dtype, len(data))
9295

9396
expect = pd.Series(data).apply(func)
9497
with _CUDFNumbaConfig():
9598
sv_kernel.forall(len(data))(str_views, output)
9699
if dtype == "str":
97-
result = column_from_udf_string_array(output)
100+
result = Column.from_pylibcudf(column_from_udf_string_array(output))
98101
else:
99102
result = output
100103

@@ -103,7 +106,7 @@ def run_udf_test(data, func, dtype):
103106
with _CUDFNumbaConfig():
104107
udf_str_kernel.forall(len(data))(str_views, output)
105108
if dtype == "str":
106-
result = column_from_udf_string_array(output)
109+
result = Column.from_pylibcudf(column_from_udf_string_array(output))
107110
else:
108111
result = output
109112

0 commit comments

Comments
 (0)