Skip to content

Commit 2b51e42

Browse files
soumagnej-xiong
authored andcommitted
prov/util: fix FI_MULTI_RECV not set on FI_ECANCELED
When canceling a multi-recv entry, FI_MULTI_RECV was never set as part of the error entry flags. Fix so that the user can determine if the buffer is still in use. Return -FI_EAGAIN on cancel if multi-recv buffer is in use. Update fi_endpoint man page to describe fi_cancel() return value. Signed-off-by: Jerome Soumagne <[email protected]>
1 parent 9fbd8b1 commit 2b51e42

File tree

2 files changed

+31
-13
lines changed

2 files changed

+31
-13
lines changed

man/fi_endpoint.3.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1704,7 +1704,9 @@ required by the application.
17041704
17051705
Returns 0 on success. On error, a negative value corresponding to
17061706
fabric errno is returned. For fi_cancel, a return value of 0
1707-
indicates that the cancel request was submitted for processing.
1707+
indicates that the cancel request was submitted for processing,
1708+
a return value of -FI_EAGAIN indicates that the request could not be
1709+
submitted and that it should be retried once progress has been made.
17081710
For fi_setopt/fi_getopt, a return value of -FI_ENOPROTOOPT
17091711
indicates the provider does not support the requested option.
17101712

prov/util/src/util_srx.c

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,10 @@ static int util_cancel_entry(struct util_srx_ctx *srx, uint64_t flags,
915915

916916
err_entry.op_context = rx_entry->peer_entry.context;
917917
err_entry.flags = flags;
918+
if (rx_entry->peer_entry.flags & FI_MULTI_RECV) {
919+
assert(rx_entry->multi_recv_ref == 0);
920+
err_entry.flags |= FI_MULTI_RECV;
921+
}
918922
err_entry.tag = rx_entry->peer_entry.tag;
919923
err_entry.err = FI_ECANCELED;
920924
err_entry.prov_errno = -FI_ECANCELED;
@@ -1027,8 +1031,8 @@ static struct fi_ops util_srx_fid_ops = {
10271031
.ops_open = fi_no_ops_open,
10281032
};
10291033

1030-
static bool util_cancel_recv(struct util_srx_ctx *srx, struct slist *queue,
1031-
uint64_t flags, void *context)
1034+
static int util_cancel_recv(struct util_srx_ctx *srx, struct slist *queue,
1035+
uint64_t flags, void *context)
10321036
{
10331037
struct slist_entry *item, *prev;
10341038
struct util_rx_entry *rx_entry;
@@ -1037,12 +1041,19 @@ static bool util_cancel_recv(struct util_srx_ctx *srx, struct slist *queue,
10371041
slist_foreach(queue, item, prev) {
10381042
rx_entry = container_of(item, struct util_rx_entry, peer_entry);
10391043
if (rx_entry->peer_entry.context == context) {
1044+
/* With multi-recv, cancellation can only be processed
1045+
* if the multi-recv buffer is no longer in use. If the
1046+
* buffer is still in use, return EAGAIN back for user
1047+
* to retry cancel request. */
1048+
if (rx_entry->peer_entry.flags & FI_MULTI_RECV &&
1049+
rx_entry->multi_recv_ref)
1050+
return -FI_EAGAIN;
10401051
slist_remove(queue, item, prev);
10411052
util_cancel_entry(srx, flags, rx_entry);
1042-
return true;
1053+
return FI_SUCCESS;
10431054
}
10441055
}
1045-
return false;
1056+
return -FI_ENOENT;
10461057
}
10471058

10481059
static int util_cancel_src(struct ofi_dyn_arr *arr, void *list, void *context)
@@ -1056,31 +1067,36 @@ static int util_cancel_src(struct ofi_dyn_arr *arr, void *list, void *context)
10561067
flags = arr == &srx->src_trecv_queues ?
10571068
FI_TAGGED | FI_RECV : FI_MSG | FI_RECV;
10581069

1059-
return (int) util_cancel_recv(srx, queue, flags, context);
1070+
return (int) (util_cancel_recv(srx, queue, flags, context) == FI_SUCCESS);
10601071
}
10611072

10621073
static ssize_t util_srx_cancel(fid_t ep_fid, void *context)
10631074
{
10641075
struct util_srx_ctx *srx;
1076+
ssize_t ret;
10651077

10661078
srx = container_of(ep_fid, struct util_srx_ctx, peer_srx.ep_fid);
10671079

10681080
ofi_genlock_lock(srx->lock);
1069-
if (util_cancel_recv(srx, &srx->tag_queue, FI_TAGGED | FI_RECV,
1070-
context))
1081+
ret = util_cancel_recv(srx, &srx->tag_queue, FI_TAGGED | FI_RECV,
1082+
context);
1083+
if (ret != -FI_ENOENT)
10711084
goto out;
10721085

1073-
if (util_cancel_recv(srx, &srx->msg_queue, FI_MSG | FI_RECV, context))
1086+
ret = util_cancel_recv(srx, &srx->msg_queue, FI_MSG | FI_RECV, context);
1087+
if (ret != -FI_ENOENT)
10741088
goto out;
10751089

1076-
if (ofi_array_iter(&srx->src_trecv_queues, context, util_cancel_src))
1077-
goto out;
1090+
if (ofi_array_iter(&srx->src_trecv_queues, context, util_cancel_src) ||
1091+
ofi_array_iter(&srx->src_recv_queues, context, util_cancel_src)) {
1092+
/* nothing to do, always return success */
1093+
}
10781094

1079-
(void) ofi_array_iter(&srx->src_recv_queues, context, util_cancel_src);
1095+
ret = FI_SUCCESS;
10801096

10811097
out:
10821098
ofi_genlock_unlock(srx->lock);
1083-
return FI_SUCCESS;
1099+
return ret;
10841100
}
10851101

10861102
static int util_srx_getopt(fid_t fid, int level, int optname,

0 commit comments

Comments
 (0)