Skip to content

Commit 4d547f2

Browse files
committed
ch4/{ofi,ucx}: Skip collective finalize in sessions model
If there is no MPI_COMM_WORLD, we should skip calling a global PMI barrier during finalization. It is not guaranteed that world processes have initialized MPI (thus PMI), so a barrier could hang.
1 parent a36d516 commit 4d547f2

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

src/mpid/ch4/netmod/ofi/ofi_init.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -925,7 +925,7 @@ int MPIDI_OFI_mpi_finalize_hook(void)
925925
/* Destroy RMA key allocator */
926926
MPIDI_OFI_mr_key_allocator_destroy();
927927

928-
if (MPIR_CVAR_NO_COLLECTIVE_FINALIZE) {
928+
if (!MPIR_Process.comm_world || MPIR_CVAR_NO_COLLECTIVE_FINALIZE) {
929929
/* skip collective work arounds */
930930
} else if (strcmp("verbs;ofi_rxm", MPIDI_OFI_global.prov_use[0]->fabric_attr->prov_name) == 0
931931
|| strcmp("psm2", MPIDI_OFI_global.prov_use[0]->fabric_attr->prov_name) == 0

src/mpid/ch4/netmod/ucx/ucx_init.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ int MPIDI_UCX_mpi_finalize_hook(void)
243243
int n = MPIDI_UCX_global.num_vcis;
244244
pending = MPL_malloc(sizeof(ucs_status_ptr_t) * MPIR_Process.size * n * n, MPL_MEM_OTHER);
245245

246-
if (!MPIR_CVAR_NO_COLLECTIVE_FINALIZE) {
246+
if (MPIR_Process.comm_world && !MPIR_CVAR_NO_COLLECTIVE_FINALIZE) {
247247
/* if some process are not present, the disconnect may timeout and give errors */
248248
mpi_errno = MPIR_pmi_barrier();
249249
MPIR_ERR_CHECK(mpi_errno);

0 commit comments

Comments
 (0)