Skip to content

Commit 1046ece

Browse files
authored
Merge pull request #9697 from rakhmets/topic/fix-rkey-pack-crash-1.16
UCP/RNDV: Fixed rkey pack crash when mem reg failed - v1.16.x
2 parents 34d9966 + 52c1568 commit 1046ece

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

src/ucp/proto/proto_common.inl

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -353,19 +353,23 @@ ucp_proto_request_pack_rkey(ucp_request_t *req, ucp_md_map_t md_map,
353353
void *rkey_buffer)
354354
{
355355
const ucp_datatype_iter_t *dt_iter = &req->send.state.dt_iter;
356+
ucp_mem_h memh;
356357
ssize_t packed_rkey_size;
357358

358359
/* For contiguous buffer, pack one rkey
359360
* TODO to support IOV datatype write N [address+length] records,
360361
*/
361362
ucs_assertv(dt_iter->dt_class == UCP_DATATYPE_CONTIG, "dt_class=%s",
362363
ucp_datatype_class_names[dt_iter->dt_class]);
363-
ucs_assertv(ucs_test_all_flags(dt_iter->type.contig.memh->md_map, md_map),
364-
"dt_iter_md_map=0x%"PRIx64" md_map=0x%"PRIx64,
365-
dt_iter->type.contig.memh->md_map, md_map);
364+
365+
memh = dt_iter->type.contig.memh;
366+
if (!ucs_test_all_flags(memh->md_map, md_map)) {
367+
ucs_trace("dt_iter_md_map=0x%"PRIx64" md_map=0x%"PRIx64, memh->md_map,
368+
md_map);
369+
}
366370

367371
packed_rkey_size = ucp_rkey_pack_memh(
368-
req->send.ep->worker->context, md_map, dt_iter->type.contig.memh,
372+
req->send.ep->worker->context, md_map & memh->md_map, memh,
369373
dt_iter->type.contig.buffer, dt_iter->length, &dt_iter->mem_info,
370374
distance_dev_map, dev_distance,
371375
ucp_ep_config(req->send.ep)->uct_rkey_pack_flags, rkey_buffer);

src/ucp/rndv/rndv_rtr.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,8 @@ static ucs_status_t ucp_proto_rndv_rtr_progress(uct_pending_req_t *self)
181181
status = ucp_datatype_iter_mem_reg(req->send.ep->worker->context,
182182
&req->send.state.dt_iter,
183183
rpriv->super.md_map,
184-
UCT_MD_MEM_ACCESS_REMOTE_PUT,
184+
UCT_MD_MEM_ACCESS_REMOTE_PUT |
185+
UCT_MD_MEM_FLAG_HIDE_ERRORS,
185186
UCP_DT_MASK_ALL);
186187
if (status != UCS_OK) {
187188
ucp_proto_request_abort(req, status);

0 commit comments

Comments
 (0)