rapidsai · Umang-projects · Jan 18, 2026 · Jan 18, 2026 · Jan 19, 2026 · Jan 19, 2026
@@ -2785,6 +2785,40 @@ def rsplit(
 
         return self._return_or_inplace(result_table, expand=expand)
 
+    def split_part(
+        self, delimiter: str | None = None, index: int = 0
+    ) -> Series | Index:
+        """
+        Splits the string by delimiter and returns the token at the given index.
+
+        Parameters
+        ----------
+        delimiter : str, default None
+            The string to split on. If not specified, split on whitespace.
+        index : int, default 0
+            The index of the token to retrieve.
+
+        Returns
+        -------
+        Series or Index
+
+        Examples
+        --------
+        >>> import cudf
+        >>> s = cudf.Series(["a_b_c", "d_e", "f"])
+        >>> s.str.split_part(delimiter="_", index=1)
+        0       b
+        1       e
+        2    None
+        dtype: object
+        """
+
+        if delimiter is None:
+            delimiter = ""
+        return self._return_or_inplace(
+            self._column.split_part(delimiter, index)
+        )
+
     def partition(self, sep: str = " ", expand: bool = True) -> Series | Index:
         """
         Split the string at the first occurrence of sep.

@@ -1211,6 +1211,22 @@ def split(self, delimiter: plc.Scalar, maxsplit: int) -> dict[int, Self]:
     def rsplit(self, delimiter: plc.Scalar, maxsplit: int) -> dict[int, Self]:
         return self._split(delimiter, maxsplit, plc.strings.split.split.rsplit)
 
+    def split_part(self, delimiter: plc.Scalar, index: int) -> Self:
+        with self.access(mode="read", scope="internal"):
+            plc_column = plc.strings.split.split.split_part(
+                self.plc_column,
+                delimiter,
+                index,
+            )
+            return cast(
+                Self,
+                (
+                    type(self)
+                    .from_pylibcudf(plc_column)
+                    ._with_type_metadata(self.dtype)
+                ),
+            )
+
     def _partition(
         self,
         delimiter: plc.Scalar,

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import json
@@ -1732,6 +1732,34 @@ def test_string_rsplit_re(n, expand):
     assert_eq(expect, got)
 
 
+@pytest.mark.parametrize(
+    "data, delimiter, index, expected",
+    [
+        (["a_b_c", "d_e", "f"], "_", 1, ["b", "e", None]),
+        (["a_b_c", "d_e", "f"], "_", 0, ["a", "d", "f"]),
+    ],
+)
+def test_split_part(data, delimiter, index, expected):
+    s = cudf.Series(data)
+    got = s.str.split_part(delimiter=delimiter, index=index)
+    expect = cudf.Series(expected)
+    assert_eq(got, expect)
+
+
+@pytest.mark.parametrize(
+    "data, index, expected",
+    [
+        (["a b c", "d  e", "f\tg", " h "], 0, ["a", "d", "f", "h"]),
+        (["a b c", "d  e", "f\tg", " h "], 1, ["b", "e", "g", None]),
+    ],
+)
+def test_split_part_whitespace(data, index, expected):
+    s = cudf.Series(data)
+    got = s.str.split_part(delimiter="", index=index)
+    expect = cudf.Series(expected)
+    assert_eq(got, expect)
+
+
 @pytest.mark.parametrize(
     "data",
     [

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -44,6 +44,13 @@ cdef extern from "cudf/strings/split/split.hpp" namespace \
         cuda_stream_view stream,
         device_memory_resource* mr) except +libcudf_exception_handler
 
+    cdef unique_ptr[column] split_part(
+        column_view strings,
+        string_scalar delimiter,
+        size_type index,
+        cuda_stream_view stream,
+        device_memory_resource* mr) except +libcudf_exception_handler
+
 
 cdef extern from "cudf/strings/split/split_re.hpp" namespace \
         "cudf::strings" nogil:

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from pylibcudf.column cimport Column
@@ -49,3 +49,8 @@ cpdef Column rsplit_record_re(
     Column input, RegexProgram prog, size_type maxsplit, Stream stream=*,
     DeviceMemoryResource mr=*,
 )
+
+cpdef Column split_part(
+    Column input, Scalar delimiter, size_type index, Stream stream=*,
+    DeviceMemoryResource mr=*,
+)
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 from rmm.pylibrmm.memory_resource import DeviceMemoryResource
@@ -65,3 +65,10 @@ def rsplit_record_re(
     stream: Stream | None = None,
     mr: DeviceMemoryResource | None = None,
 ) -> Column: ...
+def split_part(
+    input: Column,
+    delimiter: Scalar,
+    index: int,
+    stream: Stream | None = None,
+    mr: DeviceMemoryResource | None = None,
+) -> Column: ...
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -405,3 +405,26 @@ cpdef Column rsplit_record_re(
         )
 
     return Column.from_libcudf(move(c_result), stream, mr)
+
+
+cpdef Column split_part(
+    Column input, Scalar delimiter, size_type index, Stream stream=None,
+    DeviceMemoryResource mr=None,
+):
+    cdef unique_ptr[column] c_result
+    cdef const string_scalar* c_delimiter = <const string_scalar*>(
+        delimiter.c_obj.get()
+    )
+    stream = _get_stream(stream)
+    mr = _get_memory_resource(mr)
+
+    with nogil:
+        c_result = cpp_split.split_part(
+            input.view(),
+            dereference(c_delimiter),
+            index,
+            stream.view(),
+            mr.get_mr()
+        )
+
+    return Column.from_libcudf(move(c_result), stream, mr)
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 import pyarrow as pa
@@ -130,3 +130,34 @@ def test_rsplit_record_re(data_col, re_delimiter):
     )
     expect = pc.split_pattern_regex(pa_array, re_delimiter)
     assert_column_eq(expect, got)
+
+
+def test_split_part(data_col, delimiter):
+    # Using existing fixtures (data_col has ["a_b_c", "d-e-f", None], delimiter is "_")
+    _, plc_column = data_col
+    _, plc_delimiter = delimiter
+
+    # Case 1: Index 0
+    got = plc.strings.split.split.split_part(plc_column, plc_delimiter, 0)
+    expect = pa.array(["a", "d-e-f", None])
+    assert_column_eq(expect, got)
+
+    # Case 2: Index 1
+    got = plc.strings.split.split.split_part(plc_column, plc_delimiter, 1)
+    # "d-e-f" has no delimiter, so index 1 is null
+    expect = pa.array(["b", None, None])
+    assert_column_eq(expect, got)
+
+
+def test_split_part_whitespace():
+    # Standalone test for whitespace because fixtures use "_"
+    data = pa.array(["a b", "c  d", "e\\tf", None])
+    plc_column = plc.Column.from_arrow(data)
+
+    # Empty delimiter for whitespace split
+    plc_delimiter = plc.Scalar.from_arrow(pa.scalar(""))
+
+    # Index 1
+    got = plc.strings.split.split.split_part(plc_column, plc_delimiter, 1)
+    expect = pa.array(["b", "d", "f", None])
+    assert_column_eq(expect, got)