39
39
from polars .polars import _expr_nodes as pl_expr
40
40
41
41
from cudf_polars .typing import Schema , Slice as Zlice
42
+ from cudf_polars .utils .config import ConfigOptions
42
43
43
44
44
45
__all__ = [
@@ -284,7 +285,7 @@ class Scan(IR):
284
285
"""Reader-specific options, as dictionary."""
285
286
cloud_options : dict [str , Any ] | None
286
287
"""Cloud-related authentication options, currently ignored."""
287
- config_options : dict [ str , Any ]
288
+ config_options : ConfigOptions
288
289
"""GPU-specific configuration options"""
289
290
paths : list [str ]
290
291
"""List of paths to read from."""
@@ -308,7 +309,7 @@ def __init__(
308
309
typ : str ,
309
310
reader_options : dict [str , Any ],
310
311
cloud_options : dict [str , Any ] | None ,
311
- config_options : dict [ str , Any ] ,
312
+ config_options : ConfigOptions ,
312
313
paths : list [str ],
313
314
with_columns : list [str ] | None ,
314
315
skip_rows : int ,
@@ -413,7 +414,7 @@ def get_hashable(self) -> Hashable:
413
414
self .typ ,
414
415
json .dumps (self .reader_options ),
415
416
json .dumps (self .cloud_options ),
416
- json . dumps ( self .config_options ) ,
417
+ self .config_options ,
417
418
tuple (self .paths ),
418
419
tuple (self .with_columns ) if self .with_columns is not None else None ,
419
420
self .skip_rows ,
@@ -428,7 +429,7 @@ def do_evaluate(
428
429
schema : Schema ,
429
430
typ : str ,
430
431
reader_options : dict [str , Any ],
431
- config_options : dict [ str , Any ] ,
432
+ config_options : ConfigOptions ,
432
433
paths : list [str ],
433
434
with_columns : list [str ] | None ,
434
435
skip_rows : int ,
@@ -516,8 +517,7 @@ def do_evaluate(
516
517
colnames [0 ],
517
518
)
518
519
elif typ == "parquet" :
519
- parquet_options = config_options .get ("parquet_options" , {})
520
- if parquet_options .get ("chunked" , True ):
520
+ if config_options .get ("parquet_options.chunked" , default = True ):
521
521
options = plc .io .parquet .ParquetReaderOptions .builder (
522
522
plc .io .SourceInfo (paths )
523
523
).build ()
@@ -534,11 +534,13 @@ def do_evaluate(
534
534
options .set_columns (with_columns )
535
535
reader = plc .io .parquet .ChunkedParquetReader (
536
536
options ,
537
- chunk_read_limit = parquet_options .get (
538
- "chunk_read_limit" , cls .PARQUET_DEFAULT_CHUNK_SIZE
537
+ chunk_read_limit = config_options .get (
538
+ "parquet_options.chunk_read_limit" ,
539
+ default = cls .PARQUET_DEFAULT_CHUNK_SIZE ,
539
540
),
540
- pass_read_limit = parquet_options .get (
541
- "pass_read_limit" , cls .PARQUET_DEFAULT_PASS_LIMIT
541
+ pass_read_limit = config_options .get (
542
+ "parquet_options.pass_read_limit" ,
543
+ default = cls .PARQUET_DEFAULT_PASS_LIMIT ,
542
544
),
543
545
)
544
546
chk = reader .read_chunk ()
@@ -702,15 +704,15 @@ class DataFrameScan(IR):
702
704
"""Polars LazyFrame object."""
703
705
projection : tuple [str , ...] | None
704
706
"""List of columns to project out."""
705
- config_options : dict [ str , Any ]
707
+ config_options : ConfigOptions
706
708
"""GPU-specific configuration options"""
707
709
708
710
def __init__ (
709
711
self ,
710
712
schema : Schema ,
711
713
df : Any ,
712
714
projection : Sequence [str ] | None ,
713
- config_options : dict [ str , Any ] ,
715
+ config_options : ConfigOptions ,
714
716
):
715
717
self .schema = schema
716
718
self .df = df
@@ -736,7 +738,7 @@ def get_hashable(self) -> Hashable:
736
738
schema_hash ,
737
739
id (self .df ),
738
740
self .projection ,
739
- json . dumps ( self .config_options ) ,
741
+ self .config_options ,
740
742
)
741
743
742
744
@classmethod
@@ -876,7 +878,7 @@ def __init__(self, polars_groupby_options: Any):
876
878
"""Preserve order in groupby."""
877
879
options : GroupbyOptions
878
880
"""Arbitrary options."""
879
- config_options : dict [ str , Any ]
881
+ config_options : ConfigOptions
880
882
"""GPU-specific configuration options"""
881
883
882
884
def __init__ (
@@ -886,7 +888,7 @@ def __init__(
886
888
agg_requests : Sequence [expr .NamedExpr ],
887
889
maintain_order : bool , # noqa: FBT001
888
890
options : Any ,
889
- config_options : dict [ str , Any ] ,
891
+ config_options : ConfigOptions ,
890
892
df : IR ,
891
893
):
892
894
self .schema = schema
@@ -912,18 +914,6 @@ def __init__(
912
914
self .AggInfos (self .agg_requests ),
913
915
)
914
916
915
- def get_hashable (self ) -> Hashable :
916
- """Hashable representation of the node."""
917
- return (
918
- type (self ),
919
- tuple (self .schema .items ()),
920
- self .keys ,
921
- self .maintain_order ,
922
- self .options ,
923
- json .dumps (self .config_options ),
924
- self .children ,
925
- )
926
-
927
917
@staticmethod
928
918
def check_agg (agg : expr .Expr ) -> int :
929
919
"""
0 commit comments