1
1
/*
2
- * Copyright (c) 2024, NVIDIA CORPORATION.
2
+ * Copyright (c) 2023- 2024, NVIDIA CORPORATION.
3
3
*
4
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
5
* you may not use this file except in compliance with the License.
13
13
* See the License for the specific language governing permissions and
14
14
* limitations under the License.
15
15
*/
16
-
17
16
#include < cudf/column/column_factories.hpp>
18
17
#include < cudf/detail/nvtx/ranges.hpp>
18
+ #include < cudf/detail/utilities/algorithm.cuh>
19
19
#include < cudf/hashing/detail/hashing.hpp>
20
20
#include < cudf/hashing/detail/xxhash_32.cuh>
21
- #include < cudf/table/experimental/row_operators.cuh>
22
21
#include < cudf/table/table_device_view.cuh>
23
22
#include < cudf/utilities/memory_resource.hpp>
23
+ #include < cudf/utilities/span.hpp>
24
24
25
25
#include < rmm/cuda_stream_view.hpp>
26
26
#include < rmm/exec_policy.hpp>
27
27
28
+ #include < cuda/std/limits>
28
29
#include < thrust/tabulate.h>
29
30
30
- namespace cudf ::hashing {
31
+ namespace cudf {
32
+ namespace hashing {
31
33
namespace detail {
32
34
35
+ namespace {
36
+
37
+ using hash_value_type = uint32_t ;
38
+
39
+ /* *
40
+ * @brief Computes the hash value of a row in the given table.
41
+ *
42
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
43
+ */
44
+ template <typename Nullate>
45
+ class device_row_hasher {
46
+ public:
47
+ device_row_hasher (Nullate nulls, table_device_view const & t, hash_value_type seed)
48
+ : _check_nulls(nulls), _table(t), _seed(seed)
49
+ {
50
+ }
51
+
52
+ __device__ auto operator ()(size_type row_index) const noexcept
53
+ {
54
+ return cudf::detail::accumulate (
55
+ _table.begin (),
56
+ _table.end (),
57
+ _seed,
58
+ [row_index, nulls = _check_nulls] __device__ (auto hash, auto column) {
59
+ return cudf::type_dispatcher (
60
+ column.type (), element_hasher_adapter{}, column, row_index, nulls, hash);
61
+ });
62
+ }
63
+
64
+ /* *
65
+ * @brief Computes the hash value of an element in the given column.
66
+ */
67
+ class element_hasher_adapter {
68
+ public:
69
+ template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
70
+ __device__ hash_value_type operator ()(column_device_view const & col,
71
+ size_type const row_index,
72
+ Nullate const _check_nulls,
73
+ hash_value_type const _seed) const noexcept
74
+ {
75
+ if (_check_nulls && col.is_null (row_index)) {
76
+ return cuda::std::numeric_limits<hash_value_type>::max ();
77
+ }
78
+ auto const hasher = XXHash_32<T>{_seed};
79
+ return hasher (col.element <T>(row_index));
80
+ }
81
+
82
+ template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
83
+ __device__ hash_value_type operator ()(column_device_view const &,
84
+ size_type const ,
85
+ Nullate const ,
86
+ hash_value_type const ) const noexcept
87
+ {
88
+ CUDF_UNREACHABLE (" Unsupported type for XXHash_32" );
89
+ }
90
+ };
91
+
92
+ Nullate const _check_nulls;
93
+ table_device_view const _table;
94
+ hash_value_type const _seed;
95
+ };
96
+
97
+ } // namespace
98
+
33
99
std::unique_ptr<column> xxhash_32 (table_view const & input,
34
100
uint32_t seed,
35
101
rmm::cuda_stream_view stream,
@@ -45,14 +111,14 @@ std::unique_ptr<column> xxhash_32(table_view const& input,
45
111
if (input.num_columns () == 0 || input.num_rows () == 0 ) { return output; }
46
112
47
113
bool const nullable = has_nulls (input);
48
- auto const row_hasher = cudf::experimental::row::hash::row_hasher (input, stream);
114
+ auto const input_view = table_device_view::create (input, stream);
49
115
auto output_view = output->mutable_view ();
50
116
51
117
// Compute the hash value for each row
52
118
thrust::tabulate (rmm::exec_policy (stream),
53
119
output_view.begin <hash_value_type>(),
54
120
output_view.end <hash_value_type>(),
55
- row_hasher. device_hasher <XXHash_32> (nullable, seed));
121
+ device_row_hasher (nullable, *input_view , seed));
56
122
57
123
return output;
58
124
}
@@ -68,4 +134,5 @@ std::unique_ptr<column> xxhash_32(table_view const& input,
68
134
return detail::xxhash_32 (input, seed, stream, mr);
69
135
}
70
136
71
- } // namespace cudf::hashing
137
+ } // namespace hashing
138
+ } // namespace cudf
0 commit comments