Skip to content

Commit dad7d07

Browse files
authored
Merge pull request #7585 from raffenet/4.3.x-hcoll
[4.3.x] Fix hcoll support Approved-by: Hui Zhou <[email protected]>
2 parents 2784623 + 56e778a commit dad7d07

File tree

3 files changed

+18
-20
lines changed

3 files changed

+18
-20
lines changed

src/mpid/ch4/netmod/ucx/ucx_comm.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ int MPIDI_UCX_mpi_comm_commit_pre_hook(MPIR_Comm * comm)
1515
int mpi_errno = MPI_SUCCESS;
1616
MPIR_FUNC_ENTER;
1717

18-
#if defined HAVE_HCOLL
19-
hcoll_comm_create(comm, NULL);
20-
#endif
21-
2218
MPIR_FUNC_EXIT;
2319
return mpi_errno;
2420
}
@@ -29,6 +25,10 @@ int MPIDI_UCX_mpi_comm_commit_post_hook(MPIR_Comm * comm)
2925

3026
MPIR_FUNC_ENTER;
3127

28+
#if defined HAVE_HCOLL
29+
hcoll_comm_create(comm, NULL);
30+
#endif
31+
3232
MPIR_FUNC_EXIT;
3333
return mpi_errno;
3434
}

src/mpid/common/hcoll/hcoll_init.c

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -130,28 +130,28 @@ int hcoll_comm_create(MPIR_Comm * comm_ptr, void *param)
130130
int context_destroyed;
131131
mpi_errno = MPI_SUCCESS;
132132

133-
if (0 == hcoll_initialized) {
134-
mpi_errno = hcoll_initialize();
135-
MPIR_ERR_CHECK(mpi_errno);
136-
}
133+
comm_ptr->hcoll_priv.is_hcoll_init = 0;
137134

138135
if (0 == hcoll_enable) {
139-
comm_ptr->hcoll_priv.is_hcoll_init = 0;
140136
goto fn_exit;
141137
}
142138

143-
if (MPIR_Process.comm_world == comm_ptr) {
144-
hcoll_comm_world_initialized = 1;
145-
}
146-
if (!hcoll_comm_world_initialized) {
147-
comm_ptr->hcoll_priv.is_hcoll_init = 0;
148-
goto fn_exit;
149-
}
150139
num_ranks = comm_ptr->local_size;
151140
if ((MPIR_COMM_KIND__INTRACOMM != comm_ptr->comm_kind) || (2 > num_ranks)
152141
|| comm_ptr->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__NODE_ROOTS
153142
|| comm_ptr->hierarchy_kind == MPIR_COMM_HIERARCHY_KIND__NODE) {
154-
comm_ptr->hcoll_priv.is_hcoll_init = 0;
143+
goto fn_exit;
144+
}
145+
146+
if (0 == hcoll_initialized) {
147+
mpi_errno = hcoll_initialize();
148+
MPIR_ERR_CHECK(mpi_errno);
149+
}
150+
151+
if (MPIR_Process.comm_world == comm_ptr) {
152+
hcoll_comm_world_initialized = 1;
153+
}
154+
if (!hcoll_comm_world_initialized) {
155155
goto fn_exit;
156156
}
157157

@@ -194,8 +194,6 @@ int hcoll_comm_destroy(MPIR_Comm * comm_ptr, void *param)
194194

195195
int hcoll_do_progress(int vci, int *made_progress)
196196
{
197-
*made_progress = 1;
198-
199197
/* hcoll_progress_fn() has been deprecated since v4.0. */
200198
#if HCOLL_API < HCOLL_VERSION(4,0)
201199
MPID_THREAD_CS_ENTER(VCI, MPIDIU_THREAD_HCOLL_MUTEX);

src/mpid/common/hcoll/hcoll_rte.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static void progress(void)
5555
/* FIXME: The hcoll library needs to be updated to return
5656
* error codes. The progress function pointer right now
5757
* expects that the function returns void. */
58-
ret = hcoll_do_progress(&made_progress);
58+
ret = hcoll_do_progress(-1, &made_progress);
5959
MPIR_Assert(ret == MPI_SUCCESS);
6060
}
6161
}

0 commit comments

Comments
 (0)