Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stream parameter to external dict APIs #14115

Merged
6 changes: 5 additions & 1 deletion cpp/include/cudf/dictionary/encode.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,12 +53,14 @@ namespace dictionary {
*
* @param column The column to dictionary encode
* @param indices_type The integer type to use for the indices
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Returns a dictionary column
*/
std::unique_ptr<column> encode(
column_view const& column,
data_type indices_type = data_type{type_id::UINT32},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -72,11 +74,13 @@ std::unique_ptr<column> encode(
* @endcode
*
* @param dictionary_column Existing dictionary column
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with type matching the dictionary_column's keys
*/
std::unique_ptr<column> decode(
dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
6 changes: 4 additions & 2 deletions cpp/include/cudf/dictionary/search.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,12 +37,14 @@ namespace dictionary {
*
* @param dictionary The dictionary to search for the key.
* @param key The value to search for in the dictionary keyset.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned scalar's device memory.
* @return Numeric scalar index value of the key within the dictionary
* @return Numeric scalar index value of the key within the dictionary.
*/
std::unique_ptr<scalar> get_index(
dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
16 changes: 13 additions & 3 deletions cpp/include/cudf/dictionary/update_keys.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,13 +51,15 @@ namespace dictionary {
* @throw cudf_logic_error if the new_keys contain nulls.
*
* @param dictionary_column Existing dictionary column.
* @param new_keys New keys to incorporate into the dictionary_column
* @param new_keys New keys to incorporate into the dictionary_column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> add_keys(
dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -81,13 +83,15 @@ std::unique_ptr<column> add_keys(
* @throw cudf_logic_error if the keys_to_remove contain nulls.
*
* @param dictionary_column Existing dictionary column.
* @param keys_to_remove The keys to remove from the dictionary_column
* @param keys_to_remove The keys to remove from the dictionary_column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> remove_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys_to_remove,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -103,11 +107,13 @@ std::unique_ptr<column> remove_keys(
* @endcode
*
* @param dictionary_column Existing dictionary column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> remove_unused_keys(
dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -134,12 +140,14 @@ std::unique_ptr<column> remove_unused_keys(
*
* @param dictionary_column Existing dictionary column.
* @param keys New keys to use for the output column. Must not contain nulls.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> set_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -149,11 +157,13 @@ std::unique_ptr<column> set_keys(
* The result is a vector of new dictionaries with a common set of keys.
*
* @param input Dictionary columns to match keys.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary columns.
*/
std::vector<std::unique_ptr<column>> match_dictionaries(
cudf::host_span<dictionary_column_view const> input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
18 changes: 13 additions & 5 deletions cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,10 @@ class dictionary_column_wrapper : public detail::column_wrapper {
template <typename InputIterator>
dictionary_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{}
{
wrapped = cudf::dictionary::encode(
fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end));
wrapped =
cudf::dictionary::encode(fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down Expand Up @@ -978,7 +980,9 @@ class dictionary_column_wrapper : public detail::column_wrapper {
: column_wrapper{}
{
wrapped = cudf::dictionary::encode(
fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end, v));
fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end, v),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down Expand Up @@ -1134,7 +1138,9 @@ class dictionary_column_wrapper<std::string> : public detail::column_wrapper {
template <typename StringsIterator>
dictionary_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
{
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end));
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down Expand Up @@ -1169,7 +1175,9 @@ class dictionary_column_wrapper<std::string> : public detail::column_wrapper {
dictionary_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
: column_wrapper{}
{
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v));
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/dictionary/add_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,11 @@ std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column

std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::add_keys(dictionary_column, keys, cudf::get_default_stream(), mr);
return detail::add_keys(dictionary_column, keys, stream, mr);
}

} // namespace dictionary
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/decode.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -65,10 +65,11 @@ std::unique_ptr<column> decode(dictionary_column_view const& source,
} // namespace detail

std::unique_ptr<column> decode(dictionary_column_view const& source,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::decode(source, cudf::get_default_stream(), mr);
return detail::decode(source, stream, mr);
}

} // namespace dictionary
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/encode.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -89,10 +89,11 @@ data_type get_indices_type_for_size(size_type keys_size)

std::unique_ptr<column> encode(column_view const& input_column,
data_type indices_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::encode(input_column, indices_type, cudf::get_default_stream(), mr);
return detail::encode(input_column, indices_type, stream, mr);
}

} // namespace dictionary
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/dictionary/remove_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,19 @@ std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& diction

std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_column,
column_view const& keys_to_remove,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::remove_keys(dictionary_column, keys_to_remove, cudf::get_default_stream(), mr);
return detail::remove_keys(dictionary_column, keys_to_remove, stream, mr);
}

std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::remove_unused_keys(dictionary_column, cudf::get_default_stream(), mr);
return detail::remove_unused_keys(dictionary_column, stream, mr);
}

} // namespace dictionary
Expand Down
11 changes: 5 additions & 6 deletions cpp/src/dictionary/search.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -79,10 +79,8 @@ struct find_index_fn {
using ScalarType = cudf::scalar_type_t<Element>;
auto find_key = static_cast<ScalarType const&>(key).value(stream);
auto keys_view = column_device_view::create(input.keys(), stream);
auto iter = thrust::equal_range(rmm::exec_policy(cudf::get_default_stream()),
keys_view->begin<Element>(),
keys_view->end<Element>(),
find_key);
auto iter = thrust::equal_range(
rmm::exec_policy(stream), keys_view->begin<Element>(), keys_view->end<Element>(), find_key);
return type_dispatcher(input.indices().type(),
dispatch_scalar_index{},
thrust::distance(keys_view->begin<Element>(), iter.first),
Expand Down Expand Up @@ -176,10 +174,11 @@ std::unique_ptr<scalar> get_insert_index(dictionary_column_view const& dictionar

std::unique_ptr<scalar> get_index(dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::get_index(dictionary, key, cudf::get_default_stream(), mr);
return detail::get_index(dictionary, key, stream, mr);
}

} // namespace dictionary
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/dictionary/set_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -241,17 +241,20 @@ std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_d

std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::set_keys(dictionary_column, keys, cudf::get_default_stream(), mr);
return detail::set_keys(dictionary_column, keys, stream, mr);
}

std::vector<std::unique_ptr<column>> match_dictionaries(
cudf::host_span<dictionary_column_view const> input, rmm::mr::device_memory_resource* mr)
cudf::host_span<dictionary_column_view const> input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::match_dictionaries(input, cudf::get_default_stream(), mr);
return detail::match_dictionaries(input, stream, mr);
}

} // namespace dictionary
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing)
ConfigureTest(
STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE
testing
Expand Down
Loading