Skip to content

Commit 03c055f

Browse files
authored
Move strings replace benchmarks to nvbench (#17301)
Move `cpp/benchmark/string/replace.cpp` implementation from google-test to nvbench This covers strings replace APIs: - `cudf::strings::replace` scalar version - `cudf::strings::replace_multiple` column version - `cudf::strings::replace_slice` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Shruti Shivakumar (https://github.com/shrshi) URL: #17301
1 parent c7bfa77 commit 03c055f

File tree

2 files changed

+40
-52
lines changed

2 files changed

+40
-52
lines changed

cpp/benchmarks/CMakeLists.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,9 +354,7 @@ ConfigureNVBench(
354354

355355
# ##################################################################################################
356356
# * strings benchmark -------------------------------------------------------------------
357-
ConfigureBench(
358-
STRINGS_BENCH string/factory.cu string/repeat_strings.cpp string/replace.cpp string/url_decode.cu
359-
)
357+
ConfigureBench(STRINGS_BENCH string/factory.cu string/repeat_strings.cpp string/url_decode.cu)
360358

361359
ConfigureNVBench(
362360
STRINGS_NVBENCH
@@ -380,6 +378,7 @@ ConfigureNVBench(
380378
string/lengths.cpp
381379
string/like.cpp
382380
string/make_strings_column.cu
381+
string/replace.cpp
383382
string/replace_re.cpp
384383
string/reverse.cpp
385384
string/slice.cpp

cpp/benchmarks/string/replace.cpp

Lines changed: 38 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,8 @@
1414
* limitations under the License.
1515
*/
1616

17-
#include "string_bench_args.hpp"
18-
1917
#include <benchmarks/common/generate_input.hpp>
2018
#include <benchmarks/fixture/benchmark_fixture.hpp>
21-
#include <benchmarks/synchronization/synchronization.hpp>
2219

2320
#include <cudf_test/column_wrapper.hpp>
2421

@@ -27,59 +24,51 @@
2724
#include <cudf/strings/strings_column_view.hpp>
2825
#include <cudf/utilities/default_stream.hpp>
2926

30-
#include <limits>
31-
32-
class StringReplace : public cudf::benchmark {};
27+
#include <nvbench/nvbench.cuh>
3328

3429
enum replace_type { scalar, slice, multi };
3530

36-
static void BM_replace(benchmark::State& state, replace_type rt)
31+
static void bench_replace(nvbench::state& state)
3732
{
38-
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
39-
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
33+
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
34+
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
35+
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
36+
auto const api = state.get_string("api");
37+
4038
data_profile const profile = data_profile_builder().distribution(
41-
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
42-
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
43-
cudf::strings_column_view input(column->view());
44-
cudf::string_scalar target("+");
45-
cudf::string_scalar repl("");
46-
cudf::test::strings_column_wrapper targets({"+", "-"});
47-
cudf::test::strings_column_wrapper repls({"", ""});
39+
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
40+
auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
4841

49-
for (auto _ : state) {
50-
cuda_event_timer raii(state, true, cudf::get_default_stream());
51-
switch (rt) {
52-
case scalar: cudf::strings::replace(input, target, repl); break;
53-
case slice: cudf::strings::replace_slice(input, repl, 1, 10); break;
54-
case multi:
55-
cudf::strings::replace_multiple(
56-
input, cudf::strings_column_view(targets), cudf::strings_column_view(repls));
57-
break;
58-
}
59-
}
42+
cudf::strings_column_view input(column->view());
6043

61-
state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream()));
62-
}
44+
auto stream = cudf::get_default_stream();
45+
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
46+
auto const chars_size = input.chars_size(stream);
47+
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
48+
state.add_global_memory_writes<nvbench::int8_t>(chars_size);
6349

64-
static void generate_bench_args(benchmark::internal::Benchmark* b)
65-
{
66-
int const min_rows = 1 << 12;
67-
int const max_rows = 1 << 24;
68-
int const row_mult = 8;
69-
int const min_rowlen = 1 << 5;
70-
int const max_rowlen = 1 << 13;
71-
int const len_mult = 2;
72-
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
50+
if (api == "scalar") {
51+
cudf::string_scalar target("+");
52+
cudf::string_scalar repl("-");
53+
state.exec(nvbench::exec_tag::sync,
54+
[&](nvbench::launch& launch) { cudf::strings::replace(input, target, repl); });
55+
} else if (api == "multi") {
56+
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
57+
cudf::test::strings_column_wrapper targets({"+", " "});
58+
cudf::test::strings_column_wrapper repls({"-", "_"});
59+
cudf::strings::replace_multiple(
60+
input, cudf::strings_column_view(targets), cudf::strings_column_view(repls));
61+
});
62+
} else if (api == "slice") {
63+
cudf::string_scalar repl("0123456789");
64+
state.exec(nvbench::exec_tag::sync,
65+
[&](nvbench::launch& launch) { cudf::strings::replace_slice(input, repl, 1, 10); });
66+
}
7367
}
7468

75-
#define STRINGS_BENCHMARK_DEFINE(name) \
76-
BENCHMARK_DEFINE_F(StringReplace, name) \
77-
(::benchmark::State & st) { BM_replace(st, replace_type::name); } \
78-
BENCHMARK_REGISTER_F(StringReplace, name) \
79-
->Apply(generate_bench_args) \
80-
->UseManualTime() \
81-
->Unit(benchmark::kMillisecond);
82-
83-
STRINGS_BENCHMARK_DEFINE(scalar)
84-
STRINGS_BENCHMARK_DEFINE(slice)
85-
STRINGS_BENCHMARK_DEFINE(multi)
69+
NVBENCH_BENCH(bench_replace)
70+
.set_name("replace")
71+
.add_int64_axis("min_width", {0})
72+
.add_int64_axis("max_width", {32, 64, 128, 256})
73+
.add_int64_axis("num_rows", {32768, 262144, 2097152})
74+
.add_string_axis("api", {"scalar", "multi", "slice"});

0 commit comments

Comments
 (0)