Skip to content

Commit

Permalink
Change example to compute aggregation on each group
Browse files Browse the repository at this point in the history
Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia committed Nov 5, 2024
1 parent 04e2bda commit 5f7ab2b
Showing 1 changed file with 33 additions and 21 deletions.
54 changes: 33 additions & 21 deletions cpp/tests/groupby/host_udf_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@

#include <rmm/exec_policy.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>

using namespace cudf::test::iterators;

struct test : public cudf::test::BaseFixture {};

// For each group: compute (group_idx + 1)* values^2 * 2
std::unique_ptr<cudf::column> double_sqr(cudf::column_view const& values,
cudf::device_span<cudf::size_type const> group_offsets,
cudf::device_span<cudf::size_type const> group_labels,
Expand All @@ -43,16 +44,22 @@ std::unique_ptr<cudf::column> double_sqr(cudf::column_view const& values,
{
auto output = cudf::make_numeric_column(
cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream);
thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(values.size()),
output->mutable_view().begin<int>(),
[values = values.begin<int>()] __device__(int idx) -> int {
return 2 * values[idx] * values[idx];
});
thrust::for_each(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_groups),
[output = output->mutable_view().begin<int>(),
values = values.begin<int>(),
group_offsets] __device__(int idx) -> int {
auto start = group_offsets[idx];
auto end = group_offsets[idx + 1];
for (int i = start; i < end; ++i) {
output[i] = (idx + 1) * 2 * values[i] * values[i];
}
});
return output;
}

// For each group: compute (group_idx + 1)* values^2 * 3
std::unique_ptr<cudf::column> triple_sqr(cudf::column_view const& values,
cudf::device_span<cudf::size_type const> group_offsets,
cudf::device_span<cudf::size_type const> group_labels,
Expand All @@ -62,20 +69,25 @@ std::unique_ptr<cudf::column> triple_sqr(cudf::column_view const& values,
{
auto output = cudf::make_numeric_column(
cudf::data_type{cudf::type_id::INT32}, values.size(), cudf::mask_state::UNALLOCATED, stream);
thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(values.size()),
output->mutable_view().begin<int>(),
[values = values.begin<int>()] __device__(int idx) -> int {
return 3 * values[idx] * values[idx];
});
thrust::for_each(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_groups),
[output = output->mutable_view().begin<int>(),
values = values.begin<int>(),
group_offsets] __device__(int idx) -> int {
auto start = group_offsets[idx];
auto end = group_offsets[idx + 1];
for (int i = start; i < end; ++i) {
output[i] = (idx + 1) * 3 * values[i] * values[i];
}
});
return output;
}

TEST_F(test, double_sqr)
{
cudf::test::fixed_width_column_wrapper<int> keys{1, 1, 1, 1, 1};
cudf::test::fixed_width_column_wrapper<int> vals{0, 1, 2, 3, 4};
cudf::test::fixed_width_column_wrapper<int> keys{1, 2, 3, 1, 2, 3};
cudf::test::fixed_width_column_wrapper<int> vals{0, 1, 2, 3, 4, 5};

auto agg = cudf::make_host_udf_aggregation<cudf::groupby_aggregation>(double_sqr);
std::vector<cudf::groupby::aggregation_request> requests;
Expand All @@ -87,14 +99,14 @@ TEST_F(test, double_sqr)

auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream());

// Got output: 0,2,8,18,32
// Got output: 0,18,4,64,24,150
cudf::test::print(*result.second[0].results[0]);
}

TEST_F(test, triple_sqr)
{
cudf::test::fixed_width_column_wrapper<int> keys{1, 1, 1, 1, 1};
cudf::test::fixed_width_column_wrapper<int> vals{0, 1, 2, 3, 4};
cudf::test::fixed_width_column_wrapper<int> keys{1, 2, 3, 1, 2, 3};
cudf::test::fixed_width_column_wrapper<int> vals{0, 1, 2, 3, 4, 5};

auto agg = cudf::make_host_udf_aggregation<cudf::groupby_aggregation>(triple_sqr);
std::vector<cudf::groupby::aggregation_request> requests;
Expand All @@ -106,6 +118,6 @@ TEST_F(test, triple_sqr)

auto result = gb_obj.aggregate(requests, cudf::test::get_default_stream());

// Got output: 0,3,12,27,48
// Got output: 0,27,6,96,36,225
cudf::test::print(*result.second[0].results[0]);
}

0 comments on commit 5f7ab2b

Please sign in to comment.