Skip to content

Commit 958d312

Browse files
committed
Add more aggregation methods in pylibcudf
1 parent dc2a75c commit 958d312

File tree

2 files changed

+205
-4
lines changed

2 files changed

+205
-4
lines changed

python/pylibcudf/pylibcudf/aggregation.pyx

+164-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
22

33
from cython.operator cimport dereference
44
from libcpp.cast cimport dynamic_cast
@@ -20,9 +20,16 @@ from pylibcudf.libcudf.aggregation cimport (
2020
make_count_aggregation,
2121
make_covariance_aggregation,
2222
make_ewma_aggregation,
23+
make_histogram_aggregation,
24+
make_m2_aggregation,
2325
make_max_aggregation,
2426
make_mean_aggregation,
2527
make_median_aggregation,
28+
make_merge_m2_aggregation,
29+
make_merge_histogram_aggregation,
30+
make_merge_lists_aggregation,
31+
make_merge_sets_aggregation,
32+
make_merge_tdigest_aggregation,
2633
make_min_aggregation,
2734
make_nth_element_aggregation,
2835
make_nunique_aggregation,
@@ -32,6 +39,7 @@ from pylibcudf.libcudf.aggregation cimport (
3239
make_std_aggregation,
3340
make_sum_aggregation,
3441
make_sum_of_squares_aggregation,
42+
make_tdigest_aggregation,
3543
make_udf_aggregation,
3644
make_variance_aggregation,
3745
rank_method,
@@ -82,9 +90,16 @@ __all__ = [
8290
"count",
8391
"covariance",
8492
"ewma",
93+
"histogram",
94+
"m2",
8595
"max",
8696
"mean",
8797
"median",
98+
"merge_histogram",
99+
"merge_lists",
100+
"merge_m2",
101+
"merge_sets",
102+
"merge_tdigest",
88103
"min",
89104
"nth_element",
90105
"nunique",
@@ -94,6 +109,7 @@ __all__ = [
94109
"std",
95110
"sum",
96111
"sum_of_squares",
112+
"tdigest",
97113
"udf",
98114
"variance",
99115
]
@@ -639,3 +655,150 @@ cpdef Aggregation rank(
639655
)
640656
)
641657
)
658+
659+
660+
cpdef Aggregation histogram():
661+
"""Create a histogram aggregation.
662+
663+
For details, see :cpp:func:`make_histogram_aggregation`.
664+
665+
Returns
666+
-------
667+
Aggregation
668+
The histogram aggregation.
669+
"""
670+
return Aggregation.from_libcudf(
671+
move(make_histogram_aggregation[aggregation]())
672+
)
673+
674+
675+
cpdef Aggregation m2():
676+
"""Create a M2 aggregation.
677+
678+
For details, see :cpp:func:`make_m2_aggregation`.
679+
680+
Returns
681+
-------
682+
Aggregation
683+
The M2 aggregation.
684+
"""
685+
return Aggregation.from_libcudf(
686+
move(make_m2_aggregation[aggregation]())
687+
)
688+
689+
690+
cpdef Aggregation merge_m2():
691+
"""Create a merge M2 aggregation.
692+
693+
For details, see :cpp:func:`make_merge_m2_aggregation`.
694+
695+
Returns
696+
-------
697+
Aggregation
698+
The merge M2 aggregation.
699+
"""
700+
return Aggregation.from_libcudf(
701+
move(make_merge_m2_aggregation[aggregation]())
702+
)
703+
704+
705+
cpdef Aggregation merge_histogram():
706+
"""Create a merge histogram aggregation.
707+
708+
For details, see :cpp:func:`make_merge_histogram_aggregation`.
709+
710+
Returns
711+
-------
712+
Aggregation
713+
The merge histogram aggregation.
714+
"""
715+
return Aggregation.from_libcudf(
716+
move(make_merge_histogram_aggregation[aggregation]())
717+
)
718+
719+
720+
cpdef Aggregation merge_lists():
721+
"""Create a merge lists aggregation.
722+
723+
For details, see :cpp:func:`make_merge_lists_aggregation`.
724+
725+
Returns
726+
-------
727+
Aggregation
728+
The merge lists aggregation.
729+
"""
730+
return Aggregation.from_libcudf(
731+
move(make_merge_lists_aggregation[aggregation]())
732+
)
733+
734+
735+
cpdef Aggregation merge_sets(
736+
null_equality nulls_equal = null_equality.EQUAL,
737+
nan_equality nans_equal = nan_equality.ALL_EQUAL,
738+
):
739+
"""Create a merge sets aggregation.
740+
741+
For details, see :cpp:func:`make_merge_sets_aggregation`.
742+
743+
Parameters
744+
----------
745+
nulls_equal : null_equality, default EQUAL
746+
Whether or not nulls should be considered equal.
747+
nans_equal : nan_equality, default ALL_EQUAL
748+
Whether or not NaNs should be considered equal.
749+
750+
Returns
751+
-------
752+
Aggregation
753+
The merge sets aggregation.
754+
"""
755+
return Aggregation.from_libcudf(
756+
move(
757+
make_merge_sets_aggregation[aggregation](
758+
nulls_equal,
759+
nans_equal,
760+
)
761+
)
762+
)
763+
764+
765+
cpdef Aggregation merge_tdigest(int max_centroids):
766+
"""Create a merge TDIGEST aggregation.
767+
768+
For details, see :cpp:func:`make_merge_tdigest_aggregation`.
769+
770+
Parameters
771+
----------
772+
max_centroids : int
773+
Parameter controlling compression level and accuracy
774+
on subsequent queries on the output tdigest data.
775+
776+
Returns
777+
-------
778+
Aggregation
779+
The merge TDIGEST aggregation.
780+
"""
781+
return Aggregation.from_libcudf(
782+
move(make_merge_tdigest_aggregation[aggregation](max_centroids))
783+
)
784+
785+
786+
cpdef Aggregation tdigest(int max_centroids):
787+
"""Create a TDIGEST aggregation.
788+
789+
For details, see :cpp:func:`make_tdigest_aggregation`.
790+
791+
Parameters
792+
----------
793+
max_centroids : int
794+
Parameter controlling compression level and accuracy
795+
on subsequent queries on the output tdigest data.
796+
797+
Returns
798+
-------
799+
Aggregation
800+
The TDIGEST aggregation.
801+
"""
802+
return Aggregation.from_libcudf(
803+
move(make_tdigest_aggregation[aggregation](max_centroids))
804+
)

python/pylibcudf/pylibcudf/libcudf/aggregation.pxd

+41-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
22
from libc.stddef cimport size_t
33
from libc.stdint cimport int32_t
44
from libcpp cimport bool
@@ -33,6 +33,7 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
3333
ALL
3434
SUM_OF_SQUARES
3535
MEAN
36+
M2
3637
VARIANCE
3738
STD
3839
MEDIAN
@@ -41,13 +42,25 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
4142
ARGMIN
4243
NUNIQUE
4344
NTH_ELEMENT
45+
ROW_NUMBER
46+
EWMA
4447
RANK
4548
COLLECT_LIST
4649
COLLECT_SET
50+
LEAD
51+
LAG
4752
PTX
4853
CUDA
49-
CORRELATION
54+
HOST_UDF
55+
MERGE_LISTS
56+
MERGE_SETS
57+
MERGE_M2
5058
COVARIANCE
59+
CORRELATION
60+
TDIGEST
61+
MERGE_TDIGEST
62+
HISTOGRAM
63+
MERGE_HISTOGRAM
5164

5265
cdef cppclass aggregation:
5366
Kind kind
@@ -104,7 +117,7 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
104117
cdef unique_ptr[T] make_max_aggregation[T]() except +libcudf_exception_handler
105118

106119
cdef unique_ptr[T] make_count_aggregation[T](
107-
null_policy
120+
null_policy null_handling
108121
) except +libcudf_exception_handler
109122

110123
cdef unique_ptr[T] make_any_aggregation[T]() except +libcudf_exception_handler
@@ -170,3 +183,28 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
170183
null_policy null_handling,
171184
null_order null_precedence,
172185
rank_percentage percentage) except +libcudf_exception_handler
186+
187+
cdef unique_ptr[T] make_tdigest_aggregation[T](
188+
int max_centroids
189+
) except +libcudf_exception_handler
190+
191+
cdef unique_ptr[T] make_merge_tdigest_aggregation[T](
192+
int max_centroids
193+
) except +libcudf_exception_handler
194+
195+
cdef unique_ptr[T] make_histogram_aggregation[T]() except +libcudf_exception_handler
196+
197+
cdef unique_ptr[T] make_merge_histogram_aggregation[T](
198+
) except +libcudf_exception_handler
199+
200+
cdef unique_ptr[T] make_merge_lists_aggregation[T](
201+
) except +libcudf_exception_handler
202+
203+
cdef unique_ptr[T] make_merge_sets_aggregation[T](
204+
null_equality nulls_equal,
205+
nan_equality nans_equal,
206+
) except +libcudf_exception_handler
207+
208+
cdef unique_ptr[T] make_merge_m2_aggregation[T]() except +libcudf_exception_handler
209+
210+
cdef unique_ptr[T] make_m2_aggregation[T]() except +libcudf_exception_handler

0 commit comments

Comments
 (0)