From 62d72dff9363bf6a58154def9f99fdd4e8a9acc8 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 3 Jan 2025 21:05:50 -0800 Subject: [PATCH] Refactor distinct hash join to handle multiple probes with the same build table (#17609) This PR updates the distinct join implementation to allow the same build table to be reused for multiple probe operations. It also introduces several breaking changes, including removing the need for users to specify whether the input data contains nested columns. Additionally, the output order has been updated to align with the hash join behavior, with probe indices now appearing on the left and build indices on the right. The PR leverages the new conditional query API in the cuco hash set, enabling more efficient handling of nullable data. While this optimization improves performance, it is not currently reflected in benchmarks due to the absence of a dedicated test case for this scenario. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Jason Lowe (https://github.com/jlowe) - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17609 --- cpp/benchmarks/join/distinct_join.cu | 20 +- .../cudf/detail/distinct_hash_join.cuh | 112 ++++----- cpp/include/cudf/join.hpp | 35 +-- cpp/src/join/distinct_hash_join.cu | 238 ++++++++++-------- cpp/tests/join/distinct_join_tests.cpp | 59 +++-- java/src/main/native/src/TableJni.cpp | 32 +-- 6 files changed, 236 insertions(+), 260 deletions(-) diff --git a/cpp/benchmarks/join/distinct_join.cu b/cpp/benchmarks/join/distinct_join.cu index 3502cbcea2a..1085b03ac7b 100644 --- a/cpp/benchmarks/join/distinct_join.cu +++ b/cpp/benchmarks/join/distinct_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,13 +23,8 @@ void distinct_inner_join(nvbench::state& state, auto join = [](cudf::table_view const& probe_input, cudf::table_view const& build_input, cudf::null_equality compare_nulls) { - auto const has_nulls = - cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - auto hj_obj = cudf::distinct_hash_join{ - build_input, probe_input, has_nulls, compare_nulls}; - return hj_obj.inner_join(); + auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls}; + return hj_obj.inner_join(probe_input); }; BM_join(state, join); @@ -42,13 +37,8 @@ void distinct_left_join(nvbench::state& state, auto join = [](cudf::table_view const& probe_input, cudf::table_view const& build_input, cudf::null_equality compare_nulls) { - auto const has_nulls = - cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - auto hj_obj = cudf::distinct_hash_join{ - build_input, probe_input, has_nulls, compare_nulls}; - return hj_obj.left_join(); + auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls}; + return hj_obj.left_join(probe_input); }; BM_join(state, join); diff --git a/cpp/include/cudf/detail/distinct_hash_join.cuh b/cpp/include/cudf/detail/distinct_hash_join.cuh index 2acc10105cf..9a10163eb15 100644 --- a/cpp/include/cudf/detail/distinct_hash_join.cuh +++ b/cpp/include/cudf/detail/distinct_hash_join.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,19 +36,24 @@ using cudf::experimental::row::lhs_index_type; using cudf::experimental::row::rhs_index_type; /** - * @brief An comparator adapter wrapping both self comparator and two table comparator + * @brief A custom comparator used for the build table insertion */ -template -struct comparator_adapter { - comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {} - - __device__ constexpr auto operator()( +struct always_not_equal { + __device__ constexpr bool operator()( cuco::pair const&, cuco::pair const&) const noexcept { // All build table keys are distinct thus `false` no matter what return false; } +}; + +/** + * @brief An comparator adapter wrapping the two table comparator + */ +template +struct comparator_adapter { + comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {} __device__ constexpr auto operator()( cuco::pair const& lhs, @@ -62,56 +67,14 @@ struct comparator_adapter { Equal _d_equal; }; -template -struct hasher_adapter { - hasher_adapter(Hasher const& d_hasher = {}) : _d_hasher{d_hasher} {} - - template - __device__ constexpr auto operator()(cuco::pair const& key) const noexcept - { - return _d_hasher(key.first); - } - - private: - Hasher _d_hasher; -}; - /** * @brief Distinct hash join that builds hash table in creation and probes results in subsequent * `*_join` member functions. * - * @tparam HasNested Flag indicating whether there are nested columns in build/probe table + * This class enables the distinct hash join scheme that builds hash table once, and probes as many + * times as needed (possibly in parallel). */ -template -struct distinct_hash_join { - private: - /// Device row equal type - using d_equal_type = cudf::experimental::row::equality::strong_index_comparator_adapter< - cudf::experimental::row::equality::device_row_comparator>; - using hasher = hasher_adapter>; - using probing_scheme_type = cuco::linear_probing<1, hasher>; - using cuco_storage_type = cuco::storage<1>; - - /// Hash table type - using hash_table_type = cuco::static_set, - cuco::extent, - cuda::thread_scope_device, - comparator_adapter, - probing_scheme_type, - cudf::detail::cuco_allocator, - cuco_storage_type>; - - bool _has_nulls; ///< true if nulls are present in either build table or probe table - cudf::null_equality _nulls_equal; ///< whether to consider nulls as equal - cudf::table_view _build; ///< input table to build the hash map - cudf::table_view _probe; ///< input table to probe the hash map - std::shared_ptr - _preprocessed_build; ///< input table preprocssed for row operators - std::shared_ptr - _preprocessed_probe; ///< input table preprocssed for row operators - hash_table_type _hash_table; ///< hash table built on `_build` - +class distinct_hash_join { public: distinct_hash_join() = delete; ~distinct_hash_join() = default; @@ -120,21 +83,28 @@ struct distinct_hash_join { distinct_hash_join& operator=(distinct_hash_join const&) = delete; distinct_hash_join& operator=(distinct_hash_join&&) = delete; + /** + * @brief Hasher adapter used by distinct hash join + */ + struct hasher { + template + __device__ constexpr hash_value_type operator()( + cuco::pair const& key) const noexcept + { + return key.first; + } + }; + /** * @brief Constructor that internally builds the hash table based on the given `build` table. * * @throw cudf::logic_error if the number of columns in `build` table is 0. * * @param build The build table, from which the hash table is built - * @param probe The probe table - * @param has_nulls Flag to indicate if any nulls exist in the `build` table or - * any `probe` table that will be used later for join. * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. */ distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - bool has_nulls, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream); @@ -143,12 +113,36 @@ struct distinct_hash_join { */ std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; + inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::distinct_hash_join::left_join */ std::unique_ptr> left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const; + + private: + using probing_scheme_type = cuco::linear_probing<1, hasher>; + using cuco_storage_type = cuco::storage<1>; + + /// Hash table type + using hash_table_type = cuco::static_set, + cuco::extent, + cuda::thread_scope_device, + always_not_equal, + probing_scheme_type, + cudf::detail::cuco_allocator, + cuco_storage_type>; + + bool _has_nested_columns; ///< True if nested columns are present in build and probe tables + cudf::null_equality _nulls_equal; ///< Whether to consider nulls as equal + cudf::table_view _build; ///< Input table to build the hash map + std::shared_ptr + _preprocessed_build; ///< Input table preprocssed for row operators + hash_table_type _hash_table; ///< Hash table built on `_build` }; } // namespace cudf::detail diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index afefd04d4fa..cc63565eee1 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,13 +34,6 @@ namespace CUDF_EXPORT cudf { -/** - * @brief Enum to indicate whether the distinct join table has nested columns or not - * - * @ingroup column_join - */ -enum class has_nested : bool { YES, NO }; - // forward declaration namespace hashing::detail { @@ -61,7 +54,6 @@ class hash_join; /** * @brief Forward declaration for our distinct hash join */ -template class distinct_hash_join; } // namespace detail @@ -469,20 +461,19 @@ class hash_join { rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: - const std::unique_ptr _impl; + std::unique_ptr _impl; }; /** * @brief Distinct hash join that builds hash table in creation and probes results in subsequent * `*_join` member functions * + * This class enables the distinct hash join scheme that builds hash table once, and probes as many + * times as needed (possibly in parallel). + * * @note Behavior is undefined if the build table contains duplicates. * @note All NaNs are considered as equal - * - * @tparam HasNested Flag indicating whether there are nested columns in build/probe table */ -// TODO: `HasNested` to be removed via dispatching -template class distinct_hash_join { public: distinct_hash_join() = delete; @@ -496,15 +487,10 @@ class distinct_hash_join { * @brief Constructs a distinct hash join object for subsequent probe calls * * @param build The build table that contains distinct elements - * @param probe The probe table, from which the keys are probed - * @param has_nulls Flag to indicate if there exists any nulls in the `build` table or - * any `probe` table that will be used later for join * @param compare_nulls Controls whether null join-key values should match or not * @param stream CUDA stream used for device memory operations and kernel launches */ distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls = nullable_join::YES, null_equality compare_nulls = null_equality::EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream()); @@ -512,16 +498,18 @@ class distinct_hash_join { * @brief Returns the row indices that can be used to construct the result of performing * an inner join between two tables. @see cudf::inner_join(). * + * @param probe The probe table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned indices' device memory. * - * @return A pair of columns [`build_indices`, `probe_indices`] that can be used to + * @return A pair of columns [`probe_indices`, `build_indices`] that can be used to * construct the result of performing an inner join between two tables * with `build` and `probe` as the join keys. */ [[nodiscard]] std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream = cudf::get_default_stream(), + inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** @@ -532,19 +520,22 @@ class distinct_hash_join { * the row index of the matched row from the build table if there is a match. Otherwise, contains * `JoinNoneValue`. * + * @param probe The probe table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. + * * @return A `build_indices` column that can be used to construct the result of * performing a left join between two tables with `build` and `probe` as the join * keys. */ [[nodiscard]] std::unique_ptr> left_join( + cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: - using impl_type = typename cudf::detail::distinct_hash_join; ///< Implementation type + using impl_type = cudf::detail::distinct_hash_join; ///< Implementation type std::unique_ptr _impl; ///< Distinct hash join implementation }; diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index ce4d2067b82..d1a01ee76e4 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,28 +47,19 @@ namespace cudf { namespace detail { namespace { -template -auto prepare_device_equal( - std::shared_ptr build, - std::shared_ptr probe, - bool has_nulls, - cudf::null_equality compare_nulls) -{ - auto const two_table_equal = - cudf::experimental::row::equality::two_table_comparator(probe, build); - return comparator_adapter{two_table_equal.equal_to( - nullate::DYNAMIC{has_nulls}, compare_nulls)}; -} +bool constexpr has_nulls = true; ///< Always has nulls /** * @brief Device functor to create a pair of {hash_value, row_index} for a given row. - * - * @tparam Hasher The type of internal hasher to compute row hash. */ -template +template class build_keys_fn { + using hasher = + cudf::experimental::row::hash::device_row_hasher; + public: - CUDF_HOST_DEVICE build_keys_fn(Hasher const& hash) : _hash{hash} {} + CUDF_HOST_DEVICE constexpr build_keys_fn(hasher const& hash) : _hash{hash} {} __device__ __forceinline__ auto operator()(size_type i) const noexcept { @@ -76,7 +67,7 @@ class build_keys_fn { } private: - Hasher _hash; + hasher _hash; }; /** @@ -92,26 +83,19 @@ struct output_fn { }; } // namespace -template -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - bool has_nulls, - cudf::null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _has_nulls{has_nulls}, +distinct_hash_join::distinct_hash_join(cudf::table_view const& build, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) + : _has_nested_columns{cudf::has_nested_columns(build)}, _nulls_equal{compare_nulls}, _build{build}, - _probe{probe}, _preprocessed_build{ cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}, - _preprocessed_probe{ - cudf::experimental::row::equality::preprocessed_table::create(_probe, stream)}, _hash_table{build.num_rows(), CUCO_DESIRED_LOAD_FACTOR, cuco::empty_key{cuco::pair{std::numeric_limits::max(), rhs_index_type{JoinNoneValue}}}, - prepare_device_equal( - _preprocessed_build, _preprocessed_probe, has_nulls, compare_nulls), + always_not_equal{}, {}, cuco::thread_scope_device, cuco_storage_type{}, @@ -124,10 +108,10 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, if (this->_build.num_rows() == 0) { return; } auto const row_hasher = experimental::row::hash::row_hasher{this->_preprocessed_build}; - auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); + auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_hasher}); + auto const iter = + cudf::detail::make_counting_transform_iterator(0, build_keys_fn{d_hasher}); size_type const build_table_num_rows{build.num_rows()}; if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(this->_build))) { @@ -146,15 +130,15 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, } } -template std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::inner_join"}; - size_type const probe_table_num_rows{this->_probe.num_rows()}; + size_type const probe_table_num_rows{probe.num_rows()}; // If output size is zero, return immediately if (probe_table_num_rows == 0) { @@ -162,25 +146,62 @@ distinct_hash_join::inner_join(rmm::cuda_stream_view stream, std::make_unique>(0, stream, mr)); } + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_probe, _preprocessed_build); + auto build_indices = std::make_unique>(probe_table_num_rows, stream, mr); auto probe_indices = std::make_unique>(probe_table_num_rows, stream, mr); - auto const probe_row_hasher = - cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_probe_hasher}); auto found_indices = rmm::device_uvector(probe_table_num_rows, stream); auto const found_begin = thrust::make_transform_output_iterator(found_indices.begin(), output_fn{}); - // TODO conditional find for nulls once `cuco::static_set::find_if` is added - // If `idx` is within the range `[0, probe_table_num_rows)` and `found_indices[idx]` is not equal - // to `JoinNoneValue`, then `idx` has a match in the hash set. - this->_hash_table.find_async(iter, iter + probe_table_num_rows, found_begin, stream.value()); + auto const comparator_helper = [&](auto device_comparator) { + // If `idx` is within the range `[0, probe_table_num_rows)` and `found_indices[idx]` is not + // equal to `JoinNoneValue`, then `idx` has a match in the hash set. + if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(probe))) { + this->_hash_table.find_async(iter, + iter + probe_table_num_rows, + comparator_adapter{device_comparator}, + hasher{}, + found_begin, + stream.value()); + } else { + auto stencil = thrust::counting_iterator{0}; + auto const row_bitmask = + cudf::detail::bitmask_and(probe, stream, cudf::get_current_device_resource_ref()).first; + auto const pred = + cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; + + this->_hash_table.find_if_async(iter, + iter + probe_table_num_rows, + stencil, + pred, + comparator_adapter{device_comparator}, + hasher{}, + found_begin, + stream.value()); + } + }; + + if (_has_nested_columns) { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } else { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } auto const tuple_iter = cudf::detail::make_counting_transform_iterator( 0, @@ -203,16 +224,17 @@ distinct_hash_join::inner_join(rmm::cuda_stream_view stream, build_indices->resize(actual_size, stream); probe_indices->resize(actual_size, stream); - return {std::move(build_indices), std::move(probe_indices)}; + return {std::move(probe_indices), std::move(build_indices)}; } -template -std::unique_ptr> distinct_hash_join::left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const +std::unique_ptr> distinct_hash_join::left_join( + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::left_join"}; - size_type const probe_table_num_rows{this->_probe.num_rows()}; + size_type const probe_table_num_rows{probe.num_rows()}; // If output size is zero, return empty if (probe_table_num_rows == 0) { @@ -227,80 +249,82 @@ std::unique_ptr> distinct_hash_join::l thrust::fill( rmm::exec_policy_nosync(stream), build_indices->begin(), build_indices->end(), JoinNoneValue); } else { - auto const probe_row_hasher = - cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_probe, _preprocessed_build); + + auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_probe_hasher}); auto const output_begin = thrust::make_transform_output_iterator(build_indices->begin(), output_fn{}); - // TODO conditional find for nulls once `cuco::static_set::find_if` is added - this->_hash_table.find_async(iter, iter + probe_table_num_rows, output_begin, stream.value()); + auto const comparator_helper = [&](auto device_comparator) { + if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(probe))) { + this->_hash_table.find_async(iter, + iter + probe_table_num_rows, + comparator_adapter{device_comparator}, + hasher{}, + output_begin, + stream.value()); + } else { + auto stencil = thrust::counting_iterator{0}; + auto const row_bitmask = + cudf::detail::bitmask_and(probe, stream, cudf::get_current_device_resource_ref()).first; + auto const pred = + cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; + + this->_hash_table.find_if_async(iter, + iter + probe_table_num_rows, + stencil, + pred, + comparator_adapter{device_comparator}, + hasher{}, + output_begin, + stream.value()); + } + }; + + if (_has_nested_columns) { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } else { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } } return build_indices; } } // namespace detail -template <> -distinct_hash_join::~distinct_hash_join() = default; - -template <> -distinct_hash_join::~distinct_hash_join() = default; - -template <> -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls, - null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _impl{std::make_unique( - build, probe, has_nulls == nullable_join::YES, compare_nulls, stream)} -{ -} - -template <> -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls, - null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _impl{std::make_unique( - build, probe, has_nulls == nullable_join::YES, compare_nulls, stream)} -{ -} +distinct_hash_join::~distinct_hash_join() = default; -template <> -std::pair>, - std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::distinct_hash_join(cudf::table_view const& build, + null_equality compare_nulls, + rmm::cuda_stream_view stream) + : _impl{std::make_unique(build, compare_nulls, stream)} { - return _impl->inner_join(stream, mr); } -template <> std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const -{ - return _impl->inner_join(stream, mr); -} - -template <> -std::unique_ptr> -distinct_hash_join::left_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { - return _impl->left_join(stream, mr); + return _impl->inner_join(probe, stream, mr); } -template <> -std::unique_ptr> distinct_hash_join::left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const +std::unique_ptr> distinct_hash_join::left_join( + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { - return _impl->left_join(stream, mr); + return _impl->left_join(probe, stream, mr); } } // namespace cudf diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index 9070efa38fe..e1ec8cda3ac 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ struct DistinctJoinTest : public cudf::test::BaseFixture { cudf::table_view const& expected_table, cudf::out_of_bounds_policy oob_policy = cudf::out_of_bounds_policy::DONT_CHECK) { - auto const& [build_join_indices, probe_join_indices] = result; + auto const& [probe_join_indices, build_join_indices] = result; auto build_indices_span = cudf::device_span{*build_join_indices}; auto probe_indices_span = cudf::device_span{*probe_join_indices}; @@ -89,10 +89,9 @@ TEST_F(DistinctJoinTest, IntegerInnerJoin) auto build_table = cudf::table_view{{build->view()}}; auto probe_table = cudf::table_view{{probe->view()}}; - auto distinct_join = cudf::distinct_hash_join{ - build_table, probe_table, cudf::nullable_join::NO}; + auto distinct_join = cudf::distinct_hash_join{build_table}; - auto result = distinct_join.inner_join(); + auto result = distinct_join.inner_join(probe_table); auto constexpr gold_size = size / 2; auto gold = cudf::sequence(gold_size, init, cudf::numeric_scalar{2}); @@ -120,8 +119,8 @@ TEST_F(DistinctJoinTest, InnerJoinNoNulls) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{1, 2}}; strcol_wrapper col_gold_1({"s0", "s0"}); @@ -162,8 +161,8 @@ TEST_F(DistinctJoinTest, InnerJoinWithNulls) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {true, true}); @@ -229,8 +228,8 @@ TEST_F(DistinctJoinTest, InnerJoinWithStructsAndNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {true, true}); @@ -284,8 +283,8 @@ TEST_F(DistinctJoinTest, EmptyBuildTableInnerJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); this->compare_to_reference(build.view(), probe.view(), result, build.view()); } @@ -307,9 +306,9 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -332,8 +331,8 @@ TEST_F(DistinctJoinTest, EmptyProbeTableInnerJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); this->compare_to_reference(build.view(), probe.view(), result, probe.view()); } @@ -355,9 +354,9 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -391,9 +390,9 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls) cols_gold.push_back(col_gold_3.release()); Table gold(std::move(cols_gold)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -416,9 +415,9 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; column_wrapper col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}}; strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {true, true, false, true, true}); @@ -461,9 +460,9 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; auto col0_gold_names_col = strcol_wrapper{ "Samuel Vimes", "Detritus", "Carrot Ironfoundersson", "Samuel Vimes", "Angua von Überwald"}; diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 1f8b1ea207d..ed35f35794d 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2901,16 +2901,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftDistinctJoinGatherMap j_right_keys, compare_nulls_equal, [](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) { - auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - if (cudf::has_nested_columns(right)) { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - return hash.left_join(); - } else { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - return hash.left_join(); - } + cudf::distinct_hash_join hash(right, nulleq); + return hash.left_join(left); }); } @@ -3119,22 +3111,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerDistinctJoinGatherMa j_right_keys, compare_nulls_equal, [](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) { - auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - std::pair>, - std::unique_ptr>> - maps; - if (cudf::has_nested_columns(right)) { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - maps = hash.inner_join(); - } else { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - maps = hash.inner_join(); - } - // Unique join returns {right map, left map} but all the other joins - // return {left map, right map}. Swap here to make it consistent. - return std::make_pair(std::move(maps.second), std::move(maps.first)); + cudf::distinct_hash_join hash(right, nulleq); + return hash.inner_join(left); }); }