Skip to content

Commit da72cf6

Browse files
authoredDec 2, 2024··
Remove cudf._lib.filling in favor of inlining pylibcudf (#17459)
Contributes to #17317 Authors: - Matthew Roeschke (https://github.com/mroeschke) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: #17459
1 parent 852338e commit da72cf6

File tree

7 files changed

+51
-87
lines changed

7 files changed

+51
-87
lines changed
 

‎python/cudf/cudf/_lib/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ set(cython_sources
1818
column.pyx
1919
copying.pyx
2020
csv.pyx
21-
filling.pyx
2221
groupby.pyx
2322
interop.pyx
2423
merge.pyx

‎python/cudf/cudf/_lib/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
binaryop,
66
copying,
77
csv,
8-
filling,
98
groupby,
109
interop,
1110
merge,

‎python/cudf/cudf/_lib/filling.pyx

-57
This file was deleted.

‎python/cudf/cudf/core/column/categorical.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -668,13 +668,8 @@ def _fill(
668668
return self if inplace else self.copy()
669669

670670
fill_code = self._encode(fill_value)
671-
fill_scalar = cudf._lib.scalar.as_device_scalar(
672-
fill_code, self.codes.dtype
673-
)
674-
675671
result = self if inplace else self.copy()
676-
677-
libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar)
672+
result.codes._fill(fill_code, begin, end, inplace=True)
678673
return result
679674

680675
def slice(self, start: int, stop: int, stride: int | None = None) -> Self:

‎python/cudf/cudf/core/column/column.py

+32-15
Original file line numberDiff line numberDiff line change
@@ -401,14 +401,19 @@ def _fill(
401401
# the scalar is None when calling `is_valid`.
402402
slr = cudf.Scalar(fill_value, dtype=self.dtype)
403403

404-
if not inplace:
405-
return libcudf.filling.fill(self, begin, end, slr.device_value)
406-
407-
if is_string_dtype(self.dtype):
408-
return self._mimic_inplace(
409-
libcudf.filling.fill(self, begin, end, slr.device_value),
410-
inplace=True,
411-
)
404+
if not inplace or is_string_dtype(self.dtype):
405+
with acquire_spill_lock():
406+
result = type(self).from_pylibcudf(
407+
plc.filling.fill(
408+
self.to_pylibcudf(mode="read"),
409+
begin,
410+
end,
411+
slr.device_value.c_value,
412+
)
413+
)
414+
if is_string_dtype(self.dtype):
415+
return self._mimic_inplace(result, inplace=True)
416+
return result # type: ignore[return-value]
412417

413418
if not slr.is_valid() and not self.nullable:
414419
mask = as_buffer(
@@ -418,8 +423,13 @@ def _fill(
418423
)
419424
self.set_base_mask(mask)
420425

421-
libcudf.filling.fill_in_place(self, begin, end, slr.device_value)
422-
426+
with acquire_spill_lock():
427+
plc.filling.fill_in_place(
428+
self.to_pylibcudf(mode="write"),
429+
begin,
430+
end,
431+
slr.device_value.c_value,
432+
)
423433
return self
424434

425435
def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
@@ -1813,11 +1823,18 @@ def as_column(
18131823
* range objects
18141824
"""
18151825
if isinstance(arbitrary, (range, pd.RangeIndex, cudf.RangeIndex)):
1816-
column = libcudf.filling.sequence(
1817-
len(arbitrary),
1818-
as_device_scalar(arbitrary.start, dtype=cudf.dtype("int64")),
1819-
as_device_scalar(arbitrary.step, dtype=cudf.dtype("int64")),
1820-
)
1826+
with acquire_spill_lock():
1827+
column = Column.from_pylibcudf(
1828+
plc.filling.sequence(
1829+
len(arbitrary),
1830+
as_device_scalar(
1831+
arbitrary.start, dtype=np.dtype(np.int64)
1832+
).c_value,
1833+
as_device_scalar(
1834+
arbitrary.step, dtype=np.dtype(np.int64)
1835+
).c_value,
1836+
)
1837+
)
18211838
if cudf.get_option("default_integer_bitwidth") and dtype is None:
18221839
dtype = cudf.dtype(
18231840
f'i{cudf.get_option("default_integer_bitwidth")//8}'

‎python/cudf/cudf/core/frame.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1901,7 +1901,16 @@ def _repeat(
19011901
if not is_scalar(repeats):
19021902
repeats = as_column(repeats)
19031903

1904-
return libcudf.filling.repeat(columns, repeats)
1904+
with acquire_spill_lock():
1905+
plc_table = plc.Table(
1906+
[col.to_pylibcudf(mode="read") for col in columns]
1907+
)
1908+
if isinstance(repeats, ColumnBase):
1909+
repeats = repeats.to_pylibcudf(mode="read")
1910+
return [
1911+
libcudf.column.Column.from_pylibcudf(col)
1912+
for col in plc.filling.repeat(plc_table, repeats).columns()
1913+
]
19051914

19061915
@_performance_tracking
19071916
@_warn_no_dask_cudf

‎python/cudf/cudf/core/index.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import cudf
2222
from cudf import _lib as libcudf
23-
from cudf._lib.filling import sequence
2423
from cudf._lib.types import size_type_dtype
2524
from cudf.api.extensions import no_default
2625
from cudf.api.types import (
@@ -3402,11 +3401,14 @@ def interval_range(
34023401
start = start.astype(common_dtype)
34033402
freq = freq.astype(common_dtype)
34043403

3405-
bin_edges = sequence(
3406-
size=periods + 1,
3407-
init=start.device_value,
3408-
step=freq.device_value,
3409-
)
3404+
with acquire_spill_lock():
3405+
bin_edges = libcudf.column.Column.from_pylibcudf(
3406+
plc.filling.sequence(
3407+
size=periods + 1,
3408+
init=start.device_value.c_value,
3409+
step=freq.device_value.c_value,
3410+
)
3411+
)
34103412
return IntervalIndex.from_breaks(bin_edges, closed=closed, name=name)
34113413

34123414

0 commit comments

Comments
 (0)
Please sign in to comment.