Skip to content

Commit

Permalink
Expose stream parameter in public strings APIs (#14260)
Browse files Browse the repository at this point in the history
Add stream parameter to public APIs:

- `cudf::strings::strip()`
- `cudf::strings::slice_strings()`
- `cudf::strings::pad()`
- `cudf::strings::zfill()`
- `cudf::strings::wrap()`

Also cleaned up some of the doxygen comments and added stream-tests.

Reference #13744

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)

URL: #14260
  • Loading branch information
davidwendt authored Oct 10, 2023
1 parent 053da82 commit 5039d04
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 38 deletions.
14 changes: 9 additions & 5 deletions cpp/include/cudf/strings/padding.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,6 +51,7 @@ namespace strings {
* Default is pad right (left justify)
* @param fill_char Single UTF-8 character to use for padding;
* Default is the space character
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with padded strings
*/
Expand All @@ -59,6 +60,7 @@ std::unique_ptr<column> pad(
size_type width,
side_type side = side_type::RIGHT,
std::string_view fill_char = " ",
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -79,14 +81,16 @@ std::unique_ptr<column> pad(
* r is now ['001234','-09876','+00.34','-342567', '0002+2']
* @endcode
*
* @param input Strings instance for this operation.
* @param width The minimum number of characters for each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of strings.
* @param input Strings instance for this operation
* @param width The minimum number of characters for each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column of strings
*/
std::unique_ptr<column> zfill(
strings_column_view const& input,
size_type width,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
30 changes: 17 additions & 13 deletions cpp/include/cudf/strings/slice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,20 @@ namespace strings {
* r2 is now ["lo","ob"]
* @endcode
*
* @param strings Strings column for this operation.
* @param start First character position to begin the substring.
* @param stop Last character position (exclusive) to end the substring.
* @param step Distance between input characters retrieved.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with sorted elements of this instance.
* @param input Strings column for this operation
* @param start First character position to begin the substring
* @param stop Last character position (exclusive) to end the substring
* @param step Distance between input characters retrieved
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with sorted elements of this instance
*/
std::unique_ptr<column> slice_strings(
strings_column_view const& strings,
strings_column_view const& input,
numeric_scalar<size_type> const& start = numeric_scalar<size_type>(0, false),
numeric_scalar<size_type> const& stop = numeric_scalar<size_type>(0, false),
numeric_scalar<size_type> const& step = numeric_scalar<size_type>(1),
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -95,16 +97,18 @@ std::unique_ptr<column> slice_strings(
* @throw cudf::logic_error if starts and stops are not same integer type.
* @throw cudf::logic_error if starts or stops contains nulls.
*
* @param strings Strings column for this operation.
* @param starts First character positions to begin the substring.
* @param stops Last character (exclusive) positions to end the substring.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column with sorted elements of this instance.
* @param input Strings column for this operation
* @param starts First character positions to begin the substring
* @param stops Last character (exclusive) positions to end the substring
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column with sorted elements of this instance
*/
std::unique_ptr<column> slice_strings(
strings_column_view const& strings,
strings_column_view const& input,
column_view const& starts,
column_view const& stops,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
4 changes: 3 additions & 1 deletion cpp/include/cudf/strings/strip.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -57,13 +57,15 @@ namespace strings {
* string; Default is both
* @param to_strip UTF-8 encoded characters to strip from each string;
* Default is empty string which indicates strip whitespace characters
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column.
*/
std::unique_ptr<column> strip(
strings_column_view const& input,
side_type side = side_type::BOTH,
string_scalar const& to_strip = string_scalar(""),
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
14 changes: 8 additions & 6 deletions cpp/include/cudf/strings/wrap.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -57,14 +57,16 @@ namespace strings {
* wrapped_string_tbl = ["the quick\nbrown fox\njumped over\nthe lazy\nbrown dog", "hello, world"]
* ```
*
* @param[in] strings String column.
* @param[in] width Maximum character width of a line within each string.
* @param[in] mr Device memory resource used to allocate the returned column's device memory
* @return Column of wrapped strings.
* @param input String column
* @param width Maximum character width of a line within each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Column of wrapped strings
*/
std::unique_ptr<column> wrap(
strings_column_view const& strings,
strings_column_view const& input,
size_type width,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/strings/padding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -168,18 +168,20 @@ std::unique_ptr<column> pad(strings_column_view const& input,
size_type width,
side_type side,
std::string_view fill_char,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::pad(input, width, side, fill_char, cudf::get_default_stream(), mr);
return detail::pad(input, width, side, fill_char, stream, mr);
}

std::unique_ptr<column> zfill(strings_column_view const& input,
size_type width,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::zfill(input, width, cudf::get_default_stream(), mr);
return detail::zfill(input, width, stream, mr);
}

} // namespace strings
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/strings/slice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -248,20 +248,21 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
numeric_scalar<size_type> const& start,
numeric_scalar<size_type> const& stop,
numeric_scalar<size_type> const& step,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::slice_strings(strings, start, stop, step, cudf::get_default_stream(), mr);
return detail::slice_strings(strings, start, stop, step, stream, mr);
}

std::unique_ptr<column> slice_strings(strings_column_view const& strings,
column_view const& starts_column,
column_view const& stops_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::slice_strings(
strings, starts_column, stops_column, cudf::get_default_stream(), mr);
return detail::slice_strings(strings, starts_column, stops_column, stream, mr);
}

} // namespace strings
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/strings/strip.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -86,10 +86,11 @@ std::unique_ptr<column> strip(strings_column_view const& input,
std::unique_ptr<column> strip(strings_column_view const& input,
side_type side,
string_scalar const& to_strip,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::strip(input, side, to_strip, cudf::get_default_stream(), mr);
return detail::strip(input, side, to_strip, stream, mr);
}

} // namespace strings
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/strings/wrap.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,10 +19,9 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/case.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/wrap.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

Expand Down Expand Up @@ -133,10 +132,11 @@ std::unique_ptr<column> wrap(strings_column_view const& strings,

std::unique_ptr<column> wrap(strings_column_view const& strings,
size_type width,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::wrap<detail::execute_wrap>(strings, width, cudf::get_default_stream(), mr);
return detail::wrap<detail::execute_wrap>(strings, width, stream, mr);
}

} // namespace strings
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -633,8 +633,8 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing)
ConfigureTest(
STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE
testing
STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp
streams/strings/strings_tests.cpp STREAM_MODE testing
)
ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing)
Expand Down
71 changes: 71 additions & 0 deletions cpp/tests/streams/strings/strings_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/strings/padding.hpp>
#include <cudf/strings/slice.hpp>
#include <cudf/strings/strip.hpp>
#include <cudf/strings/wrap.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>

#include <string>

class StringsTest : public cudf::test::BaseFixture {};

TEST_F(StringsTest, Strip)
{
auto input = cudf::test::strings_column_wrapper({" aBc ", " ", "aaaa ", "\tb"});
auto view = cudf::strings_column_view(input);

auto const strip = cudf::string_scalar(" ", true, cudf::test::get_default_stream());
auto const side = cudf::strings::side_type::BOTH;
cudf::strings::strip(view, side, strip, cudf::test::get_default_stream());
}

TEST_F(StringsTest, Pad)
{
auto input = cudf::test::strings_column_wrapper({"333", "", "4444", "1"});
auto view = cudf::strings_column_view(input);

auto const side = cudf::strings::side_type::BOTH;
cudf::strings::pad(view, 6, side, " ", cudf::test::get_default_stream());
cudf::strings::zfill(view, 6, cudf::test::get_default_stream());
}

TEST_F(StringsTest, Wrap)
{
auto input = cudf::test::strings_column_wrapper({"the quick brown fox jumped"});
auto view = cudf::strings_column_view(input);

cudf::strings::wrap(view, 6, cudf::test::get_default_stream());
}

TEST_F(StringsTest, Slice)
{
auto input = cudf::test::strings_column_wrapper({"hello", "these", "are test strings"});
auto view = cudf::strings_column_view(input);

auto start = cudf::numeric_scalar(2, true, cudf::test::get_default_stream());
auto stop = cudf::numeric_scalar(5, true, cudf::test::get_default_stream());
auto step = cudf::numeric_scalar(1, true, cudf::test::get_default_stream());
cudf::strings::slice_strings(view, start, stop, step, cudf::test::get_default_stream());

auto starts = cudf::test::fixed_width_column_wrapper<cudf::size_type>({1, 2, 3});
auto stops = cudf::test::fixed_width_column_wrapper<cudf::size_type>({4, 5, 6});
cudf::strings::slice_strings(view, starts, stops, cudf::test::get_default_stream());
}

0 comments on commit 5039d04

Please sign in to comment.