Skip to content

Commit cb77046

Browse files
authored
Bump Polars version to <1.18 (#17632)
This PR upgrades the Polars version to 1.17. It xfails some polars tests due to known issues and adds the `maintain_order` param to joins (not implemented yet). Notable change Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) URL: #17632
1 parent 76f1c8b commit cb77046

File tree

9 files changed

+76
-16
lines changed

9 files changed

+76
-16
lines changed

conda/environments/all_cuda-118_arch-x86_64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ dependencies:
6666
- pandas
6767
- pandas>=2.0,<2.2.4dev0
6868
- pandoc
69-
- polars>=1.11,<1.15
69+
- polars>=1.11,<1.18
7070
- pre-commit
7171
- ptxcompiler
7272
- pyarrow>=14.0.0,<19.0.0a0

conda/environments/all_cuda-125_arch-x86_64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ dependencies:
6464
- pandas
6565
- pandas>=2.0,<2.2.4dev0
6666
- pandoc
67-
- polars>=1.11,<1.15
67+
- polars>=1.11,<1.18
6868
- pre-commit
6969
- pyarrow>=14.0.0,<19.0.0a0
7070
- pydata-sphinx-theme!=0.14.2

conda/recipes/cudf-polars/meta.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
22

33
{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
44
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -43,7 +43,7 @@ requirements:
4343
run:
4444
- python
4545
- pylibcudf ={{ version }}
46-
- polars >=1.11,<1.15
46+
- polars >=1.11,<1.18
4747
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
4848

4949
test:

dependencies.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ dependencies:
747747
common:
748748
- output_types: [conda, requirements, pyproject]
749749
packages:
750-
- polars>=1.11,<1.15
750+
- polars>=1.11,<1.18
751751
run_cudf_polars_experimental:
752752
common:
753753
- output_types: [conda, requirements, pyproject]

python/cudf_polars/cudf_polars/dsl/ir.py

+36-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
22
# SPDX-License-Identifier: Apache-2.0
33
"""
44
DSL nodes for the LogicalPlan of polars.
@@ -34,9 +34,11 @@
3434
from cudf_polars.utils.versions import POLARS_VERSION_GT_112
3535

3636
if TYPE_CHECKING:
37-
from collections.abc import Callable, Hashable, MutableMapping, Sequence
37+
from collections.abc import Callable, Hashable, Iterable, MutableMapping, Sequence
3838
from typing import Literal
3939

40+
from polars.polars import _expr_nodes as pl_expr
41+
4042
from cudf_polars.typing import Schema
4143

4244

@@ -1019,7 +1021,27 @@ class ConditionalJoin(IR):
10191021
__slots__ = ("ast_predicate", "options", "predicate")
10201022
_non_child = ("schema", "predicate", "options")
10211023
predicate: expr.Expr
1022-
options: tuple
1024+
"""Expression predicate to join on"""
1025+
options: tuple[
1026+
tuple[
1027+
str,
1028+
pl_expr.Operator | Iterable[pl_expr.Operator],
1029+
],
1030+
bool,
1031+
tuple[int, int] | None,
1032+
str,
1033+
bool,
1034+
Literal["none", "left", "right", "left_right", "right_left"],
1035+
]
1036+
"""
1037+
tuple of options:
1038+
- predicates: tuple of ir join type (eg. ie_join) and (In)Equality conditions
1039+
- join_nulls: do nulls compare equal?
1040+
- slice: optional slice to perform after joining.
1041+
- suffix: string suffix for right columns if names match
1042+
- coalesce: should key columns be coalesced (only makes sense for outer joins)
1043+
- maintain_order: which DataFrame row order to preserve, if any
1044+
"""
10231045

10241046
def __init__(
10251047
self, schema: Schema, predicate: expr.Expr, options: tuple, left: IR, right: IR
@@ -1029,22 +1051,24 @@ def __init__(
10291051
self.options = options
10301052
self.children = (left, right)
10311053
self.ast_predicate = to_ast(predicate)
1032-
_, join_nulls, zlice, suffix, coalesce = self.options
1054+
_, join_nulls, zlice, suffix, coalesce, maintain_order = self.options
10331055
# Preconditions from polars
10341056
assert not join_nulls
10351057
assert not coalesce
1058+
assert maintain_order == "none"
10361059
if self.ast_predicate is None:
10371060
raise NotImplementedError(
10381061
f"Conditional join with predicate {predicate}"
10391062
) # pragma: no cover; polars never delivers expressions we can't handle
1040-
self._non_child_args = (self.ast_predicate, zlice, suffix)
1063+
self._non_child_args = (self.ast_predicate, zlice, suffix, maintain_order)
10411064

10421065
@classmethod
10431066
def do_evaluate(
10441067
cls,
10451068
predicate: plc.expressions.Expression,
10461069
zlice: tuple[int, int] | None,
10471070
suffix: str,
1071+
maintain_order: Literal["none", "left", "right", "left_right", "right_left"],
10481072
left: DataFrame,
10491073
right: DataFrame,
10501074
) -> DataFrame:
@@ -1088,6 +1112,7 @@ class Join(IR):
10881112
tuple[int, int] | None,
10891113
str,
10901114
bool,
1115+
Literal["none", "left", "right", "left_right", "right_left"],
10911116
]
10921117
"""
10931118
tuple of options:
@@ -1096,6 +1121,7 @@ class Join(IR):
10961121
- slice: optional slice to perform after joining.
10971122
- suffix: string suffix for right columns if names match
10981123
- coalesce: should key columns be coalesced (only makes sense for outer joins)
1124+
- maintain_order: which DataFrame row order to preserve, if any
10991125
"""
11001126

11011127
def __init__(
@@ -1113,6 +1139,9 @@ def __init__(
11131139
self.options = options
11141140
self.children = (left, right)
11151141
self._non_child_args = (self.left_on, self.right_on, self.options)
1142+
# TODO: Implement maintain_order
1143+
if options[5] != "none":
1144+
raise NotImplementedError("maintain_order not implemented yet")
11161145
if any(
11171146
isinstance(e.value, expr.Literal)
11181147
for e in itertools.chain(self.left_on, self.right_on)
@@ -1222,12 +1251,13 @@ def do_evaluate(
12221251
tuple[int, int] | None,
12231252
str,
12241253
bool,
1254+
Literal["none", "left", "right", "left_right", "right_left"],
12251255
],
12261256
left: DataFrame,
12271257
right: DataFrame,
12281258
) -> DataFrame:
12291259
"""Evaluate and return a dataframe."""
1230-
how, join_nulls, zlice, suffix, coalesce = options
1260+
how, join_nulls, zlice, suffix, coalesce, _ = options
12311261
if how == "cross":
12321262
# Separate implementation, since cross_join returns the
12331263
# result, not the gather maps

python/cudf_polars/cudf_polars/dsl/translate.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
22
# SPDX-License-Identifier: Apache-2.0
33

44
"""Translate polars IR representation to ours."""
@@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR:
8484
# IR is versioned with major.minor, minor is bumped for backwards
8585
# compatible changes (e.g. adding new nodes), major is bumped for
8686
# incompatible changes (e.g. renaming nodes).
87-
if (version := self.visitor.version()) >= (4, 0):
87+
if (version := self.visitor.version()) >= (4, 3):
8888
e = NotImplementedError(
8989
f"No support for polars IR {version=}"
9090
) # pragma: no cover; no such version for now.

python/cudf_polars/cudf_polars/testing/plugin.py

+21
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ def pytest_configure(config: pytest.Config) -> None:
123123
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR",
124124
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR",
125125
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR",
126+
"tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
127+
"tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
128+
"tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
129+
"tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>0]": "Need to add include_file_path to IR",
130+
"tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>2]": "Need to add include_file_path to IR",
126131
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
127132
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
128133
"tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
@@ -140,6 +145,22 @@ def pytest_configure(config: pytest.Config) -> None:
140145
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero",
141146
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero",
142147
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero",
148+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
149+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
150+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
151+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
152+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
153+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
154+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
155+
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
156+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
157+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
158+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
159+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
160+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
161+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
162+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
163+
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
143164
"tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
144165
"tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
145166
"tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",

python/cudf_polars/pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
22

33
[build-system]
44
build-backend = "rapids_build_backend.build"
@@ -19,7 +19,7 @@ authors = [
1919
license = { text = "Apache 2.0" }
2020
requires-python = ">=3.10"
2121
dependencies = [
22-
"polars>=1.11,<1.15",
22+
"polars>=1.11,<1.18",
2323
"pylibcudf==25.2.*,>=0.0.0a0",
2424
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
2525
classifiers = [

python/cudf_polars/tests/test_join.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
22
# SPDX-License-Identifier: Apache-2.0
33
from __future__ import annotations
44

@@ -53,6 +53,15 @@ def right():
5353
)
5454

5555

56+
@pytest.mark.parametrize(
57+
"maintain_order", ["left", "left_right", "right_left", "right"]
58+
)
59+
def test_join_maintain_order_param_unsupported(left, right, maintain_order):
60+
q = left.join(right, on=pl.col("a"), how="inner", maintain_order=maintain_order)
61+
62+
assert_ir_translation_raises(q, NotImplementedError)
63+
64+
5665
@pytest.mark.parametrize(
5766
"join_expr",
5867
[

0 commit comments

Comments
 (0)