From 3e48743083b0df57128777968d018b4655d86d33 Mon Sep 17 00:00:00 2001 From: Joana Niermann Date: Thu, 14 Nov 2024 14:26:16 +0100 Subject: [PATCH] Use random, but sorted tracks for benchmarks and copy the detector to device --- tests/benchmarks/cpu/benchmark_propagator.cpp | 43 +++++++++++--- .../cuda/benchmark_propagator_cuda.cpp | 56 ++++++++++++++----- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/tests/benchmarks/cpu/benchmark_propagator.cpp b/tests/benchmarks/cpu/benchmark_propagator.cpp index ac3cc4cf6..02d151446 100644 --- a/tests/benchmarks/cpu/benchmark_propagator.cpp +++ b/tests/benchmarks/cpu/benchmark_propagator.cpp @@ -76,20 +76,50 @@ auto toy_cfg = toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false); void fill_tracks(vecmem::vector> &tracks, - const std::size_t theta_steps, const std::size_t phi_steps) { - // Set momentum of tracks - const scalar mom_mag{10.f * unit::GeV}; + const std::size_t n_tracks, bool do_sort = true) { + using scalar_t = dscalar; + using uniform_gen_t = + detail::random_numbers>; + using trk_generator_t = + random_track_generator, uniform_gen_t>; + + trk_generator_t::configuration trk_gen_cfg{}; + trk_gen_cfg.seed(42u); + trk_gen_cfg.n_tracks(n_tracks); + trk_gen_cfg.randomize_charge(true); + trk_gen_cfg.phi_range(-constant::pi, constant::pi); + trk_gen_cfg.eta_range(-3.f, 3.f); + trk_gen_cfg.mom_range(1.f * unit::GeV, + 100.f * unit::GeV); + trk_gen_cfg.origin({0.f, 0.f, 0.f}); + trk_gen_cfg.origin_stddev({0.f * unit::mm, + 0.f * unit::mm, + 0.f * unit::mm}); // Iterate through uniformly distributed momentum directions - for (auto traj : uniform_track_generator>( - phi_steps, theta_steps, mom_mag)) { + for (auto traj : trk_generator_t{trk_gen_cfg}) { tracks.push_back(traj); } + + if (do_sort) { + // Sort by theta angle + const auto traj_comp = [](const auto &lhs, const auto &rhs) { + constexpr auto pi_2{constant::pi_2}; + return math::fabs(pi_2 - getter::theta(lhs.dir())) < + math::fabs(pi_2 - getter::theta(rhs.dir())); + }; + + std::ranges::sort(tracks, traj_comp); + } } template static void BM_PROPAGATOR_CPU(benchmark::State &state) { + std::size_t n_tracks{static_cast(state.range(0)) * + static_cast(state.range(0))}; + // Create the toy geometry and bfield auto [det, names] = build_toy_detector(host_mr, toy_cfg); test::vector3 B{0.f, 0.f, 2.f * unit::T}; @@ -109,8 +139,7 @@ static void BM_PROPAGATOR_CPU(benchmark::State &state) { // Get tracks vecmem::vector> tracks(&host_mr); - fill_tracks(tracks, static_cast(state.range(0)), - static_cast(state.range(0))); + fill_tracks(tracks, n_tracks); total_tracks += tracks.size(); diff --git a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp b/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp index e6743514c..67745c8db 100644 --- a/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp +++ b/tests/benchmarks/cuda/benchmark_propagator_cuda.cpp @@ -35,30 +35,61 @@ auto toy_cfg = toy_det_config{}.n_brl_layers(4u).n_edc_layers(7u).do_check(false); void fill_tracks(vecmem::vector> &tracks, - const std::size_t theta_steps, const std::size_t phi_steps) { - // Set momentum of tracks - const scalar mom_mag{10.f * unit::GeV}; + const std::size_t n_tracks, bool do_sort = true) { + using scalar_t = dscalar; + using uniform_gen_t = + detail::random_numbers>; + using trk_generator_t = + random_track_generator, uniform_gen_t>; + + trk_generator_t::configuration trk_gen_cfg{}; + trk_gen_cfg.seed(42u); + trk_gen_cfg.n_tracks(n_tracks); + trk_gen_cfg.randomize_charge(true); + trk_gen_cfg.phi_range(-constant::pi, constant::pi); + trk_gen_cfg.eta_range(-3.f, 3.f); + trk_gen_cfg.mom_range(1.f * unit::GeV, + 100.f * unit::GeV); + trk_gen_cfg.origin({0.f, 0.f, 0.f}); + trk_gen_cfg.origin_stddev({0.f * unit::mm, + 0.f * unit::mm, + 0.f * unit::mm}); // Iterate through uniformly distributed momentum directions - for (auto traj : uniform_track_generator>( - phi_steps, theta_steps, mom_mag)) { + for (auto traj : trk_generator_t{trk_gen_cfg}) { tracks.push_back(traj); } + + if (do_sort) { + // Sort by theta angle + const auto traj_comp = [](const auto &lhs, const auto &rhs) { + constexpr auto pi_2{constant::pi_2}; + return math::fabs(pi_2 - getter::theta(lhs.dir())) < + math::fabs(pi_2 - getter::theta(rhs.dir())); + }; + + std::ranges::sort(tracks, traj_comp); + } } template static void BM_PROPAGATOR_CUDA(benchmark::State &state) { + std::size_t n_tracks{static_cast(state.range(0)) * + static_cast(state.range(0))}; + // Create the toy geometry - auto [det, names] = build_toy_detector(bp_mng_mr, toy_cfg); + auto [det, names] = build_toy_detector(host_mr, toy_cfg); test::vector3 B{0.f, 0.f, 2.f * unit::T}; auto bfield = bfield::create_const_field(B); - // Get detector data - auto det_data = detray::get_data(det); - // vecmem copy helper object - vecmem::cuda::copy copy; + vecmem::cuda::copy cuda_cpy; + + // Copy detector to device + auto det_buff = detray::get_buffer(det, dev_mr, cuda_cpy); + auto det_view = detray::get_data(det_buff); std::size_t total_tracks = 0; @@ -68,8 +99,7 @@ static void BM_PROPAGATOR_CUDA(benchmark::State &state) { // Get tracks vecmem::vector> tracks(&bp_mng_mr); - fill_tracks(tracks, static_cast(state.range(0)), - static_cast(state.range(0))); + fill_tracks(tracks, n_tracks); total_tracks += tracks.size(); @@ -79,7 +109,7 @@ static void BM_PROPAGATOR_CUDA(benchmark::State &state) { auto tracks_data = vecmem::get_data(tracks); // Run the propagator test for GPU device - propagator_benchmark(det_data, bfield, tracks_data, opt); + propagator_benchmark(det_view, bfield, tracks_data, opt); } state.counters["TracksPropagated"] = benchmark::Counter(