11/*
2- * Copyright (c) 2024, NVIDIA CORPORATION.
2+ * Copyright (c) 2023- 2024, NVIDIA CORPORATION.
33 *
44 * Licensed under the Apache License, Version 2.0 (the "License");
55 * you may not use this file except in compliance with the License.
1313 * See the License for the specific language governing permissions and
1414 * limitations under the License.
1515 */
16-
1716#include < cudf/column/column_factories.hpp>
1817#include < cudf/detail/nvtx/ranges.hpp>
18+ #include < cudf/detail/utilities/algorithm.cuh>
1919#include < cudf/hashing/detail/hashing.hpp>
2020#include < cudf/hashing/detail/xxhash_32.cuh>
21- #include < cudf/table/experimental/row_operators.cuh>
2221#include < cudf/table/table_device_view.cuh>
2322#include < cudf/utilities/memory_resource.hpp>
23+ #include < cudf/utilities/span.hpp>
2424
2525#include < rmm/cuda_stream_view.hpp>
2626#include < rmm/exec_policy.hpp>
2727
28+ #include < cuda/std/limits>
2829#include < thrust/tabulate.h>
2930
30- namespace cudf ::hashing {
31+ namespace cudf {
32+ namespace hashing {
3133namespace detail {
3234
35+ namespace {
36+
37+ using hash_value_type = uint32_t ;
38+
39+ /* *
40+ * @brief Computes the hash value of a row in the given table.
41+ *
42+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
43+ */
44+ template <typename Nullate>
45+ class device_row_hasher {
46+ public:
47+ device_row_hasher (Nullate nulls, table_device_view const & t, hash_value_type seed)
48+ : _check_nulls(nulls), _table(t), _seed(seed)
49+ {
50+ }
51+
52+ __device__ auto operator ()(size_type row_index) const noexcept
53+ {
54+ return cudf::detail::accumulate (
55+ _table.begin (),
56+ _table.end (),
57+ _seed,
58+ [row_index, nulls = _check_nulls] __device__ (auto hash, auto column) {
59+ return cudf::type_dispatcher (
60+ column.type (), element_hasher_adapter{}, column, row_index, nulls, hash);
61+ });
62+ }
63+
64+ /* *
65+ * @brief Computes the hash value of an element in the given column.
66+ */
67+ class element_hasher_adapter {
68+ public:
69+ template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
70+ __device__ hash_value_type operator ()(column_device_view const & col,
71+ size_type const row_index,
72+ Nullate const _check_nulls,
73+ hash_value_type const _seed) const noexcept
74+ {
75+ if (_check_nulls && col.is_null (row_index)) {
76+ return cuda::std::numeric_limits<hash_value_type>::max ();
77+ }
78+ auto const hasher = XXHash_32<T>{_seed};
79+ return hasher (col.element <T>(row_index));
80+ }
81+
82+ template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
83+ __device__ hash_value_type operator ()(column_device_view const &,
84+ size_type const ,
85+ Nullate const ,
86+ hash_value_type const ) const noexcept
87+ {
88+ CUDF_UNREACHABLE (" Unsupported type for XXHash_32" );
89+ }
90+ };
91+
92+ Nullate const _check_nulls;
93+ table_device_view const _table;
94+ hash_value_type const _seed;
95+ };
96+
97+ } // namespace
98+
3399std::unique_ptr<column> xxhash_32 (table_view const & input,
34100 uint32_t seed,
35101 rmm::cuda_stream_view stream,
@@ -45,14 +111,14 @@ std::unique_ptr<column> xxhash_32(table_view const& input,
45111 if (input.num_columns () == 0 || input.num_rows () == 0 ) { return output; }
46112
47113 bool const nullable = has_nulls (input);
48- auto const row_hasher = cudf::experimental::row::hash::row_hasher (input, stream);
114+ auto const input_view = table_device_view::create (input, stream);
49115 auto output_view = output->mutable_view ();
50116
51117 // Compute the hash value for each row
52118 thrust::tabulate (rmm::exec_policy (stream),
53119 output_view.begin <hash_value_type>(),
54120 output_view.end <hash_value_type>(),
55- row_hasher. device_hasher <XXHash_32> (nullable, seed));
121+ device_row_hasher (nullable, *input_view , seed));
56122
57123 return output;
58124}
@@ -68,4 +134,5 @@ std::unique_ptr<column> xxhash_32(table_view const& input,
68134 return detail::xxhash_32 (input, seed, stream, mr);
69135}
70136
71- } // namespace cudf::hashing
137+ } // namespace hashing
138+ } // namespace cudf
0 commit comments