From c093136b82a79570c71a7698c026cc76eaeaaeaf Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Sun, 17 Sep 2023 14:06:09 -0700 Subject: [PATCH 1/4] adds stream parameter to external dict APIs Signed-off-by: Suraj Aralihalli --- cpp/include/cudf/dictionary/encode.hpp | 6 +++++- cpp/include/cudf/dictionary/search.hpp | 6 ++++-- cpp/include/cudf/dictionary/update_keys.hpp | 16 +++++++++++++--- cpp/src/dictionary/add_keys.cu | 3 ++- cpp/src/dictionary/decode.cu | 5 +++-- cpp/src/dictionary/encode.cu | 5 +++-- cpp/src/dictionary/remove_keys.cu | 6 ++++-- cpp/src/dictionary/search.cu | 5 +++-- cpp/src/dictionary/set_keys.cu | 9 ++++++--- 9 files changed, 43 insertions(+), 18 deletions(-) diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp index fb13eabe11a..959b785bf87 100644 --- a/cpp/include/cudf/dictionary/encode.hpp +++ b/cpp/include/cudf/dictionary/encode.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,12 +53,14 @@ namespace dictionary { * * @param column The column to dictionary encode * @param indices_type The integer type to use for the indices + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return Returns a dictionary column */ std::unique_ptr encode( column_view const& column, data_type indices_type = data_type{type_id::UINT32}, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -72,11 +74,13 @@ std::unique_ptr encode( * @endcode * * @param dictionary_column Existing dictionary column + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New column with type matching the dictionary_column's keys */ std::unique_ptr decode( dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/dictionary/search.hpp b/cpp/include/cudf/dictionary/search.hpp index ed7a9c84693..1b72cf42acd 100644 --- a/cpp/include/cudf/dictionary/search.hpp +++ b/cpp/include/cudf/dictionary/search.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,12 +37,14 @@ namespace dictionary { * * @param dictionary The dictionary to search for the key. * @param key The value to search for in the dictionary keyset. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned scalar's device memory. - * @return Numeric scalar index value of the key within the dictionary + * @return Numeric scalar index value of the key within the dictionary. */ std::unique_ptr get_index( dictionary_column_view const& dictionary, scalar const& key, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp index 2fcfb5e1f7c..81728e1ff73 100644 --- a/cpp/include/cudf/dictionary/update_keys.hpp +++ b/cpp/include/cudf/dictionary/update_keys.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,13 +51,15 @@ namespace dictionary { * @throw cudf_logic_error if the new_keys contain nulls. * * @param dictionary_column Existing dictionary column. - * @param new_keys New keys to incorporate into the dictionary_column + * @param new_keys New keys to incorporate into the dictionary_column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr add_keys( dictionary_column_view const& dictionary_column, column_view const& new_keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -81,13 +83,15 @@ std::unique_ptr add_keys( * @throw cudf_logic_error if the keys_to_remove contain nulls. * * @param dictionary_column Existing dictionary column. - * @param keys_to_remove The keys to remove from the dictionary_column + * @param keys_to_remove The keys to remove from the dictionary_column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr remove_keys( dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -103,11 +107,13 @@ std::unique_ptr remove_keys( * @endcode * * @param dictionary_column Existing dictionary column. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr remove_unused_keys( dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -134,12 +140,14 @@ std::unique_ptr remove_unused_keys( * * @param dictionary_column Existing dictionary column. * @param keys New keys to use for the output column. Must not contain nulls. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary column. */ std::unique_ptr set_keys( dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -149,11 +157,13 @@ std::unique_ptr set_keys( * The result is a vector of new dictionaries with a common set of keys. * * @param input Dictionary columns to match keys. + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New dictionary columns. */ std::vector> match_dictionaries( cudf::host_span input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index ab22c07e4d5..3973100aced 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -130,10 +130,11 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column std::unique_ptr add_keys(dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::add_keys(dictionary_column, keys, cudf::get_default_stream(), mr); + return detail::add_keys(dictionary_column, keys, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/decode.cu b/cpp/src/dictionary/decode.cu index 01411d06b62..fdf546b5875 100644 --- a/cpp/src/dictionary/decode.cu +++ b/cpp/src/dictionary/decode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,10 +65,11 @@ std::unique_ptr decode(dictionary_column_view const& source, } // namespace detail std::unique_ptr decode(dictionary_column_view const& source, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::decode(source, cudf::get_default_stream(), mr); + return detail::decode(source, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/encode.cu b/cpp/src/dictionary/encode.cu index fe8e777b694..c92b57f0cac 100644 --- a/cpp/src/dictionary/encode.cu +++ b/cpp/src/dictionary/encode.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,10 +89,11 @@ data_type get_indices_type_for_size(size_type keys_size) std::unique_ptr encode(column_view const& input_column, data_type indices_type, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::encode(input_column, indices_type, cudf::get_default_stream(), mr); + return detail::encode(input_column, indices_type, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 9fe4a63373b..86b70f1119b 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -195,17 +195,19 @@ std::unique_ptr remove_unused_keys(dictionary_column_view const& diction std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_keys(dictionary_column, keys_to_remove, cudf::get_default_stream(), mr); + return detail::remove_keys(dictionary_column, keys_to_remove, stream, mr); } std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::remove_unused_keys(dictionary_column, cudf::get_default_stream(), mr); + return detail::remove_unused_keys(dictionary_column, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 8e97a387780..dbbafe2559b 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -176,10 +176,11 @@ std::unique_ptr get_insert_index(dictionary_column_view const& dictionar std::unique_ptr get_index(dictionary_column_view const& dictionary, scalar const& key, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::get_index(dictionary, key, cudf::get_default_stream(), mr); + return detail::get_index(dictionary, key, stream, mr); } } // namespace dictionary diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 36f5021d305..b49cf7850b1 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -241,17 +241,20 @@ std::pair>, std::vector> match_d std::unique_ptr set_keys(dictionary_column_view const& dictionary_column, column_view const& keys, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::set_keys(dictionary_column, keys, cudf::get_default_stream(), mr); + return detail::set_keys(dictionary_column, keys, stream, mr); } std::vector> match_dictionaries( - cudf::host_span input, rmm::mr::device_memory_resource* mr) + cudf::host_span input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::match_dictionaries(input, cudf::get_default_stream(), mr); + return detail::match_dictionaries(input, stream, mr); } } // namespace dictionary From 3919675668fcdd41c4ebf91508b92ee7d77ca3a8 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Sun, 17 Sep 2023 19:40:59 -0700 Subject: [PATCH 2/4] adds test cases Signed-off-by: Suraj Aralihalli --- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/dictionary_test.cpp | 105 ++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 cpp/tests/streams/dictionary_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a69dc9bf2f8..5a92299aecd 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -627,6 +627,7 @@ ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE t ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/streams/dictionary_test.cpp b/cpp/tests/streams/dictionary_test.cpp new file mode 100644 index 00000000000..e382074ab30 --- /dev/null +++ b/cpp/tests/streams/dictionary_test.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +class DictionaryTest : public cudf::test::BaseFixture {}; + +TEST_F(DictionaryTest, Encode) +{ + cudf::test::fixed_width_column_wrapper col({1, 2, 3, 4, 5}); + cudf::data_type int32_type(cudf::type_id::UINT32); + cudf::column_view col_view = col; + cudf::dictionary::encode(col_view, int32_type, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, Decode) +{ + // keys = {0, 2, 6}, indices = {0, 1, 1, 2, 2} + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::dictionary::decode(dict_col_view, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, GetIndex) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::numeric_scalar key_scalar(2); + cudf::dictionary::get_index(dict_col_view, key_scalar, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, AddKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper new_keys_col({8, 9}); + cudf::dictionary::add_keys(dict_col_view, new_keys_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, RemoveKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper keys_to_remove_col({2}); + cudf::dictionary::remove_keys( + dict_col_view, keys_to_remove_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, RemoveUnsedKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::dictionary::remove_unused_keys(dict_col_view, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, SetKeys) +{ + std::vector elements{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); + cudf::dictionary_column_view dict_col_view = dict_col; + cudf::test::fixed_width_column_wrapper keys_col({2, 6}); + cudf::dictionary::set_keys(dict_col_view, keys_col, cudf::test::get_default_stream()); +} + +TEST_F(DictionaryTest, MatchDictionaries) +{ + std::vector elements_a{0, 2, 2, 6, 6}; + cudf::test::dictionary_column_wrapper dict_col_a(elements_a.begin(), elements_a.end()); + cudf::dictionary_column_view dict_col_view_a = dict_col_a; + + std::vector elements_b{1, 3, 4, 5, 5}; + cudf::test::dictionary_column_wrapper dict_col_b(elements_b.begin(), elements_b.end()); + cudf::dictionary_column_view dict_col_view_b = dict_col_b; + + std::vector dicts = {dict_col_view_a, dict_col_view_b}; + + cudf::test::fixed_width_column_wrapper keys_col({2, 6}); + cudf::dictionary::match_dictionaries(dicts, cudf::test::get_default_stream()); +} From 4dd76af81debe423faa3e23329e9ee3dd7916ea9 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Thu, 21 Sep 2023 22:07:20 -0700 Subject: [PATCH 3/4] fix unexpected stream found bug Signed-off-by: Suraj Aralihalli --- cpp/include/cudf_test/column_wrapper.hpp | 18 +++++++++++++----- cpp/src/dictionary/search.cu | 6 ++---- cpp/tests/streams/dictionary_test.cpp | 2 +- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index cc8cac35ef4..c0932b81dc3 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -944,8 +944,10 @@ class dictionary_column_wrapper : public detail::column_wrapper { template dictionary_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{} { - wrapped = cudf::dictionary::encode( - fixed_width_column_wrapper(begin, end)); + wrapped = + cudf::dictionary::encode(fixed_width_column_wrapper(begin, end), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -978,7 +980,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { : column_wrapper{} { wrapped = cudf::dictionary::encode( - fixed_width_column_wrapper(begin, end, v)); + fixed_width_column_wrapper(begin, end, v), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -1134,7 +1138,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { template dictionary_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{} { - wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end)); + wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** @@ -1169,7 +1175,9 @@ class dictionary_column_wrapper : public detail::column_wrapper { dictionary_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v) : column_wrapper{} { - wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v)); + wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v), + cudf::data_type{type_id::UINT32}, + cudf::test::get_default_stream()); } /** diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index dbbafe2559b..e35aded1984 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -79,10 +79,8 @@ struct find_index_fn { using ScalarType = cudf::scalar_type_t; auto find_key = static_cast(key).value(stream); auto keys_view = column_device_view::create(input.keys(), stream); - auto iter = thrust::equal_range(rmm::exec_policy(cudf::get_default_stream()), - keys_view->begin(), - keys_view->end(), - find_key); + auto iter = thrust::equal_range( + rmm::exec_policy(stream), keys_view->begin(), keys_view->end(), find_key); return type_dispatcher(input.indices().type(), dispatch_scalar_index{}, thrust::distance(keys_view->begin(), iter.first), diff --git a/cpp/tests/streams/dictionary_test.cpp b/cpp/tests/streams/dictionary_test.cpp index e382074ab30..f48e64c078e 100644 --- a/cpp/tests/streams/dictionary_test.cpp +++ b/cpp/tests/streams/dictionary_test.cpp @@ -48,7 +48,7 @@ TEST_F(DictionaryTest, GetIndex) std::vector elements{0, 2, 2, 6, 6}; cudf::test::dictionary_column_wrapper dict_col(elements.begin(), elements.end()); cudf::dictionary_column_view dict_col_view = dict_col; - cudf::numeric_scalar key_scalar(2); + cudf::numeric_scalar key_scalar(2, true, cudf::test::get_default_stream()); cudf::dictionary::get_index(dict_col_view, key_scalar, cudf::test::get_default_stream()); } From 73c6aecf4ad3b57f985b4f3f4aa143d53a24f4b5 Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Fri, 22 Sep 2023 14:39:47 -0700 Subject: [PATCH 4/4] remove blank line accedentally added during merge conflict Signed-off-by: Suraj Aralihalli --- cpp/tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 59b8d3049d2..e245bb449a0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -634,7 +634,6 @@ ConfigureTest( testing ) - # ################################################################################################## # Install tests #################################################################################### # ##################################################################################################