Skip to content

Commit 52f7d5c

Browse files
authored
Expose stream parameter in public strings filter APIs (#14293)
Add stream parameter to public APIs: - `cudf::strings::translate()` - `cudf::strings::filter_characters()` - `cudf::strings::filter_characters_of_type()` - `cudf::strings::all_characters_of_type()` - `cudf::strings::reverse()` Also cleaned up some of the doxygen comments. Reference #13744 Authors: - David Wendt (https://github.com/davidwendt) - https://github.com/shrshi Approvers: - Bradley Dice (https://github.com/bdice) - https://github.com/shrshi URL: #14293
1 parent 7d6c377 commit 52f7d5c

File tree

10 files changed

+162
-35
lines changed

10 files changed

+162
-35
lines changed

cpp/include/cudf/strings/char_types/char_types.hpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -53,18 +53,20 @@ namespace strings {
5353
*
5454
* Any null row results in a null entry for that row in the output column.
5555
*
56-
* @param strings Strings instance for this operation.
57-
* @param types The character types to check in each string.
56+
* @param input Strings instance for this operation
57+
* @param types The character types to check in each string
5858
* @param verify_types Only verify against these character types.
5959
* Default `ALL_TYPES` means return `true`
6060
* iff all characters match `types`.
61-
* @param mr Device memory resource used to allocate the returned column's device memory.
62-
* @return New column of boolean results for each string.
61+
* @param stream CUDA stream used for device memory operations and kernel launches
62+
* @param mr Device memory resource used to allocate the returned column's device memory
63+
* @return New column of boolean results for each string
6364
*/
6465
std::unique_ptr<column> all_characters_of_type(
65-
strings_column_view const& strings,
66+
strings_column_view const& input,
6667
string_character_types types,
6768
string_character_types verify_types = string_character_types::ALL_TYPES,
69+
rmm::cuda_stream_view stream = cudf::get_default_stream(),
6870
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
6971

7072
/**
@@ -96,20 +98,22 @@ std::unique_ptr<column> all_characters_of_type(
9698
* @throw cudf::logic_error if neither or both `types_to_remove` and
9799
* `types_to_keep` are set to `ALL_TYPES`.
98100
*
99-
* @param strings Strings instance for this operation.
101+
* @param input Strings instance for this operation
100102
* @param types_to_remove The character types to check in each string.
101103
* Use `ALL_TYPES` here to specify `types_to_keep` instead.
102-
* @param replacement The replacement character to use when removing characters.
104+
* @param replacement The replacement character to use when removing characters
103105
* @param types_to_keep Default `ALL_TYPES` means all characters of
104106
* `types_to_remove` will be filtered.
105-
* @param mr Device memory resource used to allocate the returned column's device memory.
106-
* @return New column of boolean results for each string.
107+
* @param mr Device memory resource used to allocate the returned column's device memory
108+
* @param stream CUDA stream used for device memory operations and kernel launches
109+
* @return New column of boolean results for each string
107110
*/
108111
std::unique_ptr<column> filter_characters_of_type(
109-
strings_column_view const& strings,
112+
strings_column_view const& input,
110113
string_character_types types_to_remove,
111114
string_scalar const& replacement = string_scalar(""),
112115
string_character_types types_to_keep = string_character_types::ALL_TYPES,
116+
rmm::cuda_stream_view stream = cudf::get_default_stream(),
113117
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
114118

115119
/** @} */ // end of doxygen group

cpp/include/cudf/strings/reverse.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -42,10 +42,12 @@ namespace strings {
4242
*
4343
* @param input Strings column for this operation
4444
* @param mr Device memory resource used to allocate the returned column's device memory
45+
* @param stream CUDA stream used for device memory operations and kernel launches
4546
* @return New strings column
4647
*/
4748
std::unique_ptr<column> reverse(
4849
strings_column_view const& input,
50+
rmm::cuda_stream_view stream = cudf::get_default_stream(),
4951
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
5052

5153
/** @} */ // end of doxygen group

cpp/include/cudf/strings/translate.hpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -47,14 +47,16 @@ namespace strings {
4747
* r is now ["AA", "", "cccc", "AcQ"]
4848
* @endcode
4949
*
50-
* @param strings Strings instance for this operation.
51-
* @param chars_table Table of UTF-8 character mappings.
52-
* @param mr Device memory resource used to allocate the returned column's device memory.
53-
* @return New column with padded strings.
50+
* @param input Strings instance for this operation
51+
* @param chars_table Table of UTF-8 character mappings
52+
* @param stream CUDA stream used for device memory operations and kernel launches
53+
* @param mr Device memory resource used to allocate the returned column's device memory
54+
* @return New column with padded strings
5455
*/
5556
std::unique_ptr<column> translate(
56-
strings_column_view const& strings,
57+
strings_column_view const& input,
5758
std::vector<std::pair<char_utf8, char_utf8>> const& chars_table,
59+
rmm::cuda_stream_view stream = cudf::get_default_stream(),
5860
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
5961

6062
/**
@@ -87,19 +89,21 @@ enum class filter_type : bool {
8789
*
8890
* @throw cudf::logic_error if `replacement` is invalid
8991
*
90-
* @param strings Strings instance for this operation.
91-
* @param characters_to_filter Table of character ranges to filter on.
92+
* @param input Strings instance for this operation
93+
* @param characters_to_filter Table of character ranges to filter on
9294
* @param keep_characters If true, the `characters_to_filter` are retained and all other characters
93-
* are removed.
94-
* @param replacement Optional replacement string for each character removed.
95-
* @param mr Device memory resource used to allocate the returned column's device memory.
96-
* @return New column with filtered strings.
95+
* are removed
96+
* @param replacement Optional replacement string for each character removed
97+
* @param stream CUDA stream used for device memory operations and kernel launches
98+
* @param mr Device memory resource used to allocate the returned column's device memory
99+
* @return New column with filtered strings
97100
*/
98101
std::unique_ptr<column> filter_characters(
99-
strings_column_view const& strings,
102+
strings_column_view const& input,
100103
std::vector<std::pair<cudf::char_utf8, cudf::char_utf8>> characters_to_filter,
101104
filter_type keep_characters = filter_type::KEEP,
102105
string_scalar const& replacement = string_scalar(""),
106+
rmm::cuda_stream_view stream = cudf::get_default_stream(),
103107
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
104108

105109
/** @} */ // end of doxygen group

cpp/src/strings/char_types/char_types.cu

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,25 +214,26 @@ std::unique_ptr<column> filter_characters_of_type(strings_column_view const& str
214214

215215
// external API
216216

217-
std::unique_ptr<column> all_characters_of_type(strings_column_view const& strings,
217+
std::unique_ptr<column> all_characters_of_type(strings_column_view const& input,
218218
string_character_types types,
219219
string_character_types verify_types,
220+
rmm::cuda_stream_view stream,
220221
rmm::mr::device_memory_resource* mr)
221222
{
222223
CUDF_FUNC_RANGE();
223-
return detail::all_characters_of_type(
224-
strings, types, verify_types, cudf::get_default_stream(), mr);
224+
return detail::all_characters_of_type(input, types, verify_types, stream, mr);
225225
}
226226

227-
std::unique_ptr<column> filter_characters_of_type(strings_column_view const& strings,
227+
std::unique_ptr<column> filter_characters_of_type(strings_column_view const& input,
228228
string_character_types types_to_remove,
229229
string_scalar const& replacement,
230230
string_character_types types_to_keep,
231+
rmm::cuda_stream_view stream,
231232
rmm::mr::device_memory_resource* mr)
232233
{
233234
CUDF_FUNC_RANGE();
234235
return detail::filter_characters_of_type(
235-
strings, types_to_remove, replacement, types_to_keep, cudf::get_default_stream(), mr);
236+
input, types_to_remove, replacement, types_to_keep, stream, mr);
236237
}
237238

238239
} // namespace strings

cpp/src/strings/filter_chars.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,16 @@ std::unique_ptr<column> filter_characters(
154154
* @copydoc cudf::strings::filter_characters
155155
*/
156156
std::unique_ptr<column> filter_characters(
157-
strings_column_view const& strings,
157+
strings_column_view const& input,
158158
std::vector<std::pair<cudf::char_utf8, cudf::char_utf8>> characters_to_filter,
159159
filter_type keep_characters,
160160
string_scalar const& replacement,
161+
rmm::cuda_stream_view stream,
161162
rmm::mr::device_memory_resource* mr)
162163
{
163164
CUDF_FUNC_RANGE();
164165
return detail::filter_characters(
165-
strings, characters_to_filter, keep_characters, replacement, cudf::get_default_stream(), mr);
166+
input, characters_to_filter, keep_characters, replacement, stream, mr);
166167
}
167168

168169
} // namespace strings

cpp/src/strings/reverse.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,11 @@ std::unique_ptr<column> reverse(strings_column_view const& input,
7979
} // namespace detail
8080

8181
std::unique_ptr<column> reverse(strings_column_view const& input,
82+
rmm::cuda_stream_view stream,
8283
rmm::mr::device_memory_resource* mr)
8384
{
8485
CUDF_FUNC_RANGE();
85-
return detail::reverse(input, cudf::get_default_stream(), mr);
86+
return detail::reverse(input, stream, mr);
8687
}
8788

8889
} // namespace strings

cpp/src/strings/translate.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,13 @@ std::unique_ptr<column> translate(strings_column_view const& strings,
124124

125125
// external APIs
126126

127-
std::unique_ptr<column> translate(strings_column_view const& strings,
127+
std::unique_ptr<column> translate(strings_column_view const& input,
128128
std::vector<std::pair<uint32_t, uint32_t>> const& chars_table,
129+
rmm::cuda_stream_view stream,
129130
rmm::mr::device_memory_resource* mr)
130131
{
131132
CUDF_FUNC_RANGE();
132-
return detail::translate(strings, chars_table, cudf::get_default_stream(), mr);
133+
return detail::translate(input, chars_table, stream, mr);
133134
}
134135

135136
} // namespace strings

cpp/tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,8 +641,10 @@ ConfigureTest(
641641
streams/strings/contains_test.cpp
642642
streams/strings/convert_test.cpp
643643
streams/strings/extract_test.cpp
644+
streams/strings/filter_test.cpp
644645
streams/strings/find_test.cpp
645646
streams/strings/replace_test.cpp
647+
streams/strings/reverse_test.cpp
646648
streams/strings/split_test.cpp
647649
streams/strings/strings_tests.cpp
648650
STREAM_MODE
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Copyright (c) 2023, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <cudf/strings/char_types/char_types.hpp>
18+
#include <cudf/strings/translate.hpp>
19+
20+
#include <cudf_test/base_fixture.hpp>
21+
#include <cudf_test/column_wrapper.hpp>
22+
#include <cudf_test/default_stream.hpp>
23+
24+
#include <string>
25+
#include <vector>
26+
27+
class StringsFilterTest : public cudf::test::BaseFixture {};
28+
29+
static std::pair<cudf::char_utf8, cudf::char_utf8> make_entry(char const* from, char const* to)
30+
{
31+
cudf::char_utf8 in = 0;
32+
cudf::char_utf8 out = 0;
33+
cudf::strings::detail::to_char_utf8(from, in);
34+
if (to) cudf::strings::detail::to_char_utf8(to, out);
35+
return std::pair(in, out);
36+
}
37+
38+
TEST_F(StringsFilterTest, Translate)
39+
{
40+
auto input = cudf::test::strings_column_wrapper({" aBc ", " ", "aaaa ", "\tb"});
41+
auto view = cudf::strings_column_view(input);
42+
43+
std::vector<std::pair<cudf::char_utf8, cudf::char_utf8>> translate_table{
44+
make_entry("b", 0), make_entry("a", "A"), make_entry(" ", "_")};
45+
cudf::strings::translate(view, translate_table, cudf::test::get_default_stream());
46+
}
47+
48+
TEST_F(StringsFilterTest, Filter)
49+
{
50+
auto input = cudf::test::strings_column_wrapper({" aBc ", " ", "aaaa ", "\tb"});
51+
auto view = cudf::strings_column_view(input);
52+
53+
std::vector<std::pair<cudf::char_utf8, cudf::char_utf8>> filter_table{
54+
make_entry("b", 0), make_entry("a", "A"), make_entry(" ", "_")};
55+
56+
auto const repl = cudf::string_scalar("X", true, cudf::test::get_default_stream());
57+
auto const keep = cudf::strings::filter_type::KEEP;
58+
cudf::strings::filter_characters(
59+
view, filter_table, keep, repl, cudf::test::get_default_stream());
60+
}
61+
62+
TEST_F(StringsFilterTest, FilterTypes)
63+
{
64+
auto input = cudf::test::strings_column_wrapper({" aBc ", " ", "aaaa ", "\tb"});
65+
auto view = cudf::strings_column_view(input);
66+
67+
auto const verify_types =
68+
cudf::strings::string_character_types::LOWER | cudf::strings::string_character_types::UPPER;
69+
auto const all_types = cudf::strings::string_character_types::ALL_TYPES;
70+
cudf::strings::all_characters_of_type(
71+
view, verify_types, all_types, cudf::test::get_default_stream());
72+
73+
auto const repl = cudf::string_scalar("X", true, cudf::test::get_default_stream());
74+
auto const space_types = cudf::strings::string_character_types::SPACE;
75+
cudf::strings::filter_characters_of_type(
76+
view, all_types, repl, space_types, cudf::test::get_default_stream());
77+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright (c) 2023, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <cudf/strings/reverse.hpp>
18+
19+
#include <cudf_test/base_fixture.hpp>
20+
#include <cudf_test/column_wrapper.hpp>
21+
#include <cudf_test/default_stream.hpp>
22+
23+
#include <string>
24+
#include <vector>
25+
26+
class StringsReverseTest : public cudf::test::BaseFixture {};
27+
28+
TEST_F(StringsReverseTest, Reverse)
29+
{
30+
auto input = cudf::test::strings_column_wrapper({"aBcdef", " ", "12345"});
31+
auto view = cudf::strings_column_view(input);
32+
33+
cudf::strings::reverse(view, cudf::test::get_default_stream());
34+
}

0 commit comments

Comments
 (0)