From a205781b4a3ce0c51ea0bafc0e9d552c0e45c90d Mon Sep 17 00:00:00 2001 From: Zach Dworkin Date: Fri, 21 Nov 2025 11:07:44 -0800 Subject: [PATCH] prov/rxm, prov/util: Update logic for source and dir_recv When updating the unspec queue address information the peer_context's connection is sometimes NULL. This results in segmentation faults on av_insert so checking for it before accessing it fixes the issue. However, this isn't a complete fix because it leaves potential for an entry to get stuck in the wrong queue. The util_srx implementation is overcomplicated for the non-directed receive case. To simplify this, queuing will be limited to the unspecified unexpected queue when not using directed receive. This is because we do not need to enable the per-peer queues unless directed receive is requested. This makes sure that entries do not get stuck in the wrong queue because they are all living in the same one. It also simplifies the lookup for the non-directed receive case. Note: We do not want to fix this by forcing the rx_buf->conn to be set for all incoming message. We want to avoid an extra lookup/set because the conn is only needed for FI_SOURCE and FI_DIRECTED_RECV cases. Signed-off-by: Zach Dworkin --- prov/rxm/src/rxm_domain.c | 3 +++ prov/util/src/util_srx.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/prov/rxm/src/rxm_domain.c b/prov/rxm/src/rxm_domain.c index 39bfa24f96b..7c4ae9b99f8 100644 --- a/prov/rxm/src/rxm_domain.c +++ b/prov/rxm/src/rxm_domain.c @@ -225,6 +225,9 @@ static fi_addr_t rxm_get_addr(struct fi_peer_rx_entry *rx_entry) { struct rxm_rx_buf *rx_buf = rx_entry->peer_context; + if (!rx_buf->conn) + return rx_entry->addr; + return rx_buf->conn->peer->fi_addr; } diff --git a/prov/util/src/util_srx.c b/prov/util/src/util_srx.c index 3ed2527ce5b..ac45ccccade 100644 --- a/prov/util/src/util_srx.c +++ b/prov/util/src/util_srx.c @@ -346,7 +346,7 @@ static int util_queue_msg(struct fi_peer_rx_entry *rx_entry) util_entry = container_of(rx_entry, struct util_rx_entry, peer_entry); assert(util_entry->status == RX_ENTRY_UNEXP); - if (rx_entry->addr == FI_ADDR_UNSPEC) { + if (!srx_ctx->dir_recv || rx_entry->addr == FI_ADDR_UNSPEC) { dlist_insert_tail(&util_entry->d_entry, &srx_ctx->unspec_unexp_msg_queue); } else { @@ -371,7 +371,7 @@ static int util_queue_tag(struct fi_peer_rx_entry *rx_entry) util_entry = container_of(rx_entry, struct util_rx_entry, peer_entry); assert(util_entry->status == RX_ENTRY_UNEXP); - if (rx_entry->addr == FI_ADDR_UNSPEC) { + if (!srx_ctx->dir_recv || rx_entry->addr == FI_ADDR_UNSPEC) { dlist_insert_tail(&util_entry->d_entry, &srx_ctx->unspec_unexp_tag_queue); } else {