From 307743c7e58981823375cc229bc29497f9f05c90 Mon Sep 17 00:00:00 2001 From: Alexey Novikov Date: Thu, 6 Nov 2025 02:41:58 +0000 Subject: [PATCH] fabtests/benchmarks: Separate warmup and measurement phases in RMA bandwidth tests - Add is_warmup parameter to bandwidth_rma() function to distinguish between warmup and measurement phases - Add explicit warmup run before the main benchmark loop in rma_bw.c - Ensure accurate bandwidth measurements by isolating warmup overhead - Add warmup for server->client connection for `writedata` mode - Skip perf stats printing for warmup pass This change improves the accuracy of RMA bandwidth measurements by clearly separating the warmup phase from the actual performance measurement phase. Signed-off-by: Alexey Novikov --- fabtests/benchmarks/benchmark_shared.c | 28 ++++++++++++++++++++++---- fabtests/benchmarks/benchmark_shared.h | 2 +- fabtests/benchmarks/rma_bw.c | 8 ++++++-- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/fabtests/benchmarks/benchmark_shared.c b/fabtests/benchmarks/benchmark_shared.c index bace77c01d1..69886e10cd8 100644 --- a/fabtests/benchmarks/benchmark_shared.c +++ b/fabtests/benchmarks/benchmark_shared.c @@ -658,8 +658,9 @@ static int bw_rma_comp(enum ft_rma_opcodes rma_op, int num_completions) return ret; } -int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote) +int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote, bool is_warmup) { + const int num_iterations = is_warmup ? opts.warmup_iterations : opts.iterations; int ret, i, j; uint64_t flags = 0; size_t offset, inject_size = fi->tx_attr->inject_size; @@ -688,9 +689,8 @@ int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote) offset_rma_start = FT_RMA_SYNC_MSG_BYTES + MAX(ft_tx_prefix_size(), ft_rx_prefix_size()); - for (i = j = 0; i < opts.iterations + opts.warmup_iterations; i++) { - if (i == opts.warmup_iterations) - ft_start(); + ft_start(); + for (i = j = 0; i < num_iterations; i++) { if (j == 0) { offset = offset_rma_start; if (ft_check_opts(FT_OPT_VERIFY_DATA) && opts.transfer_size > 0) { @@ -726,6 +726,16 @@ int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote) break; case FT_RMA_WRITEDATA: if (!opts.dst_addr) { + if (is_warmup) { + ret = ft_post_tx( + ep, + remote_fi_addr, + FT_RMA_SYNC_MSG_BYTES, + NO_CQ_DATA, + &tx_ctx); + if (ret) + return ret; + } if (fi->rx_attr->mode & FI_RX_CQ_DATA) ret = ft_post_rx(ep, 0, &rx_ctx_arr[j].context); else @@ -736,6 +746,14 @@ int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote) rx_seq++; } else { + if (is_warmup) { + ret = ft_post_rx( + ep, + FT_RMA_SYNC_MSG_BYTES, + &rx_ctx); + if (ret) + return ret; + } if (opts.transfer_size <= inject_size) { ret = ft_post_rma_inject(FT_RMA_WRITEDATA, tx_buf + offset, @@ -780,6 +798,8 @@ int bandwidth_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote) return ret; ft_stop(); + if (is_warmup) + return 0; if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 1, opts.argc, opts.argv); diff --git a/fabtests/benchmarks/benchmark_shared.h b/fabtests/benchmarks/benchmark_shared.h index 6cd29d5977f..c0df50fcd26 100644 --- a/fabtests/benchmarks/benchmark_shared.h +++ b/fabtests/benchmarks/benchmark_shared.h @@ -49,7 +49,7 @@ int pingpong(void); int run_pingpong(void); int bandwidth(void); int pingpong_rma(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote); -int bandwidth_rma(enum ft_rma_opcodes op, struct fi_rma_iov *remote); +int bandwidth_rma(enum ft_rma_opcodes op, struct fi_rma_iov *remote, bool is_warmup); int rma_tx_completion(enum ft_rma_opcodes rma_op, struct fi_rma_iov *remote); #ifdef __cplusplus diff --git a/fabtests/benchmarks/rma_bw.c b/fabtests/benchmarks/rma_bw.c index 7d0764f5267..c8e809430df 100644 --- a/fabtests/benchmarks/rma_bw.c +++ b/fabtests/benchmarks/rma_bw.c @@ -58,19 +58,23 @@ static int run(void) if (ret) return ret; + ret = bandwidth_rma(opts.rma_op, &remote, true); + if (ret) + goto out; + if (!(opts.options & FT_OPT_SIZE)) { for (i = 0; i < TEST_CNT; i++) { if (!ft_use_size(i, opts.sizes_enabled)) continue; opts.transfer_size = test_size[i].size; init_test(&opts, test_name, sizeof(test_name)); - ret = bandwidth_rma(opts.rma_op, &remote); + ret = bandwidth_rma(opts.rma_op, &remote, false); if (ret) goto out; } } else { init_test(&opts, test_name, sizeof(test_name)); - ret = bandwidth_rma(opts.rma_op, &remote); + ret = bandwidth_rma(opts.rma_op, &remote, false); if (ret) goto out; }