From 6fd2a41ee70e3c713b2257bbe2e6e811ce42bd15 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 6 Dec 2022 11:05:00 -0800 Subject: [PATCH] [v1.14.x] prov/rxm: Always use rendezvous protocol for ZE device memory send Bounce buffer copy overhead is high for ZE device memory. The rendezvous protocol takes advantage of GPU RDMA and performs better even for small messages. Signed-off-by: Jianxin Xiong (cherry-picked an adapted from commit 7efb4f8715f062a10b421672057b8d38dbde534e) --- prov/rxm/src/rxm_ep.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 1a8f0203a2c..1924e607e31 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -1525,6 +1525,10 @@ rxm_send_common(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn, (data_len > rxm_ep->rxm_info->tx_attr->inject_size)) || (data_len <= rxm_ep->rxm_info->tx_attr->inject_size)); + iface = rxm_mr_desc_to_hmem_iface_dev(desc, count, &device); + if (iface == FI_HMEM_ZE) + goto rndv_send; + if (data_len <= rxm_ep->eager_limit) { ret = rxm_send_eager(rxm_ep, rxm_conn, iov, desc, count, context, data, flags, tag, op, @@ -1534,8 +1538,7 @@ rxm_send_common(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn, context, data, flags, tag, op, data_len, rxm_ep_sar_calc_segs_cnt(rxm_ep, data_len)); } else { - iface = rxm_mr_desc_to_hmem_iface_dev(desc, count, &device); - +rndv_send: ret = rxm_alloc_rndv_buf(rxm_ep, rxm_conn, context, (uint8_t) count, iov, desc, data_len, data, flags, tag, op,