Skip to content

Commit c5e6c36

Browse files
committed
all: use fallback routines for internal collectives
The fallback collectives (e.g. MPIR_Bcast_fallback) are manual "auto" functions that may not be the best algorithms for the system, but are sufficient for internal usages during init and object constricutions. This prepares for the revamp of CSEL.
1 parent 01b7cd9 commit c5e6c36

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+267
-257
lines changed

src/binding/c/comm_api.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,8 @@ MPI_Intercomm_merge:
300300
* The Intel test suite checks for this; it is also an easy
301301
* error to make */
302302
acthigh = high ? 1 : 0; /* Clamp high into 1 or 0 */
303-
mpi_errno = MPIR_Allreduce(MPI_IN_PLACE, &acthigh, 1, MPIR_INT_INTERNAL,
304-
MPI_SUM, intercomm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
303+
mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, &acthigh, 1, MPIR_INT_INTERNAL,
304+
MPI_SUM, intercomm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
305305
MPIR_ERR_CHECK(mpi_errno);
306306
/* acthigh must either == 0 or the size of the local comm */
307307
if (acthigh != 0 && acthigh != intercomm_ptr->local_size) {

src/include/mpir_coll.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#define MPIR_Allreduce_fallback MPIR_Allreduce_intra_recursive_doubling
2020
#define MPIR_Bcast_fallback MPIR_Bcast_intra_binomial
2121
#define MPIR_Gather_fallback MPIR_Gather_intra_binomial
22+
#define MPIR_Reduce_scatter_block_fallback MPIR_Reduce_scatter_block_intra_recursive_doubling
2223

2324

2425
/* Internal point-to-point communication for collectives */

src/mpi/coll/algorithms/treealgo/treeutil.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -758,9 +758,9 @@ int MPII_Treeutil_tree_topology_aware_k_init(MPIR_Comm * comm, int k, int root,
758758
} else {
759759
/* rank level - build a tree on the ranks */
760760
/* Do an allgather to know the current num_children on each rank */
761-
mpi_errno = MPIR_Allgather_impl(&(ct->num_children), 1, MPIR_INT_INTERNAL,
762-
num_childrens, 1, MPIR_INT_INTERNAL, comm,
763-
MPIR_COLL_ATTR_SYNC);
761+
mpi_errno = MPIR_Allgather_fallback(&(ct->num_children), 1, MPIR_INT_INTERNAL,
762+
num_childrens, 1, MPIR_INT_INTERNAL, comm,
763+
MPIR_COLL_ATTR_SYNC);
764764
MPIR_ERR_CHECK(mpi_errno);
765765

766766
int switch_leader = tree_ut_int_elt(&level->ranks, level->root_idx);

src/mpi/coll/allgather/allgather_intra_smp.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ int MPIR_Allgather_intra_smp_no_order(const void *sendbuf, MPI_Aint sendcount,
2929
int external_rank = comm_ptr->external_rank;
3030

3131
if (local_size == comm_size || external_size == comm_size) {
32-
mpi_errno = MPIR_Allgather_impl(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
33-
comm_ptr, coll_attr);
32+
mpi_errno =
33+
MPIR_Allgather_fallback(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
34+
comm_ptr, coll_attr);
3435
goto fn_exit;
3536
}
3637

@@ -80,20 +81,20 @@ int MPIR_Allgather_intra_smp_no_order(const void *sendbuf, MPI_Aint sendcount,
8081
MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent);
8182
local_recvbuf = (char *) recvbuf + displs[external_rank] * recvtype_extent;
8283
}
83-
mpi_errno = MPIR_Gather_impl(sendbuf, sendcount, sendtype,
84-
local_recvbuf, recvcount, recvtype, 0, node_comm, coll_attr);
84+
mpi_errno = MPIR_Gather_fallback(sendbuf, sendcount, sendtype,
85+
local_recvbuf, recvcount, recvtype, 0, node_comm, coll_attr);
8586
MPIR_ERR_CHECK(mpi_errno);
8687

8788
/* -- allgatherv over node roots -- */
8889
if (local_rank == 0) {
89-
mpi_errno = MPIR_Allgatherv_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
90-
recvbuf, counts, displs, recvtype,
91-
node_roots_comm, coll_attr);
90+
mpi_errno = MPIR_Allgatherv_fallback(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
91+
recvbuf, counts, displs, recvtype,
92+
node_roots_comm, coll_attr);
9293
MPIR_ERR_CHECK(mpi_errno);
9394
}
9495

9596
/* -- bcast over node -- */
96-
mpi_errno = MPIR_Bcast_impl(recvbuf, total_count, recvtype, 0, node_comm, coll_attr);
97+
mpi_errno = MPIR_Bcast_fallback(recvbuf, total_count, recvtype, 0, node_comm, coll_attr);
9798
MPIR_ERR_CHECK(mpi_errno);
9899

99100
fn_exit:

src/mpi/coll/allgatherv/allgatherv_inter_remote_gather_local_bcast.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ int MPIR_Allgatherv_inter_remote_gather_local_bcast(const void *sendbuf, MPI_Ain
7171
mpi_errno = MPIR_Type_commit_impl(&newtype);
7272
MPIR_ERR_CHECK(mpi_errno);
7373

74-
mpi_errno = MPIR_Bcast_allcomm_auto(recvbuf, 1, newtype, 0, newcomm_ptr, coll_attr);
74+
mpi_errno = MPIR_Bcast_fallback(recvbuf, 1, newtype, 0, newcomm_ptr, coll_attr);
7575
MPIR_ERR_CHECK(mpi_errno);
7676

7777
MPIR_Type_free_impl(&newtype);

src/mpi/comm/comm_impl.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -310,24 +310,24 @@ int MPIR_Comm_create_inter(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, MPIR_Co
310310
MPIR_ERR_CHECK(mpi_errno);
311311

312312
/* Broadcast to the other members of the local group */
313-
mpi_errno = MPIR_Bcast(rinfo, 2, MPIR_INT_INTERNAL, 0, comm_ptr->local_comm,
314-
MPIR_COLL_ATTR_SYNC);
313+
mpi_errno = MPIR_Bcast_fallback(rinfo, 2, MPIR_INT_INTERNAL, 0, comm_ptr->local_comm,
314+
MPIR_COLL_ATTR_SYNC);
315315
MPIR_ERR_CHECK(mpi_errno);
316-
mpi_errno = MPIR_Bcast(remote_mapping, remote_size, MPIR_INT_INTERNAL, 0,
317-
comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
316+
mpi_errno = MPIR_Bcast_fallback(remote_mapping, remote_size, MPIR_INT_INTERNAL, 0,
317+
comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
318318
MPIR_ERR_CHECK(mpi_errno);
319319
} else {
320320
/* The other processes */
321321
/* Broadcast to the other members of the local group */
322-
mpi_errno = MPIR_Bcast(rinfo, 2, MPIR_INT_INTERNAL, 0, comm_ptr->local_comm,
323-
MPIR_COLL_ATTR_SYNC);
322+
mpi_errno = MPIR_Bcast_fallback(rinfo, 2, MPIR_INT_INTERNAL, 0, comm_ptr->local_comm,
323+
MPIR_COLL_ATTR_SYNC);
324324
MPIR_ERR_CHECK(mpi_errno);
325325

326326
context_id = rinfo[0];
327327
remote_size = rinfo[1];
328328
MPIR_CHKLMEM_MALLOC(remote_mapping, remote_size * sizeof(int));
329-
mpi_errno = MPIR_Bcast(remote_mapping, remote_size, MPIR_INT_INTERNAL, 0,
330-
comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
329+
mpi_errno = MPIR_Bcast_fallback(remote_mapping, remote_size, MPIR_INT_INTERNAL, 0,
330+
comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
331331
MPIR_ERR_CHECK(mpi_errno);
332332
}
333333

@@ -690,8 +690,8 @@ int MPIR_Intercomm_create_from_groups_impl(MPIR_Group * local_group_ptr, int loc
690690

691691
/* synchronize mpi_errno */
692692
int tmp_err = mpi_errno;
693-
mpi_errno = MPIR_Bcast_impl(&tmp_err, 1, MPIR_INT_INTERNAL, local_leader, local_comm,
694-
MPIR_COLL_ATTR_SYNC);
693+
mpi_errno = MPIR_Bcast_fallback(&tmp_err, 1, MPIR_INT_INTERNAL, local_leader, local_comm,
694+
MPIR_COLL_ATTR_SYNC);
695695
MPIR_ERR_CHECK(mpi_errno);
696696
mpi_errno = tmp_err;
697697
MPIR_ERR_CHECK(mpi_errno);
@@ -1050,8 +1050,8 @@ int MPIR_Intercomm_merge_impl(MPIR_Comm * comm_ptr, int high, MPIR_Comm ** new_i
10501050
* value of local_high, which may have changed if both groups
10511051
* of processes had the same value for high
10521052
*/
1053-
mpi_errno = MPIR_Bcast(&local_high, 1, MPIR_INT_INTERNAL, 0, comm_ptr->local_comm,
1054-
MPIR_COLL_ATTR_SYNC);
1053+
mpi_errno = MPIR_Bcast_fallback(&local_high, 1, MPIR_INT_INTERNAL, 0, comm_ptr->local_comm,
1054+
MPIR_COLL_ATTR_SYNC);
10551055
MPIR_ERR_CHECK(mpi_errno);
10561056

10571057
/*

src/mpi/comm/comm_split.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,9 @@ int MPIR_Comm_split_impl(MPIR_Comm * comm_ptr, int color, int key, MPIR_Comm **
111111
local_comm_ptr = comm_ptr;
112112
}
113113
/* Gather information on the local group of processes */
114-
mpi_errno = MPIR_Allgather(MPI_IN_PLACE, 2, MPIR_INT_INTERNAL, table, 2, MPIR_INT_INTERNAL,
115-
local_comm_ptr, MPIR_COLL_ATTR_SYNC);
114+
mpi_errno =
115+
MPIR_Allgather_fallback(MPI_IN_PLACE, 2, MPIR_INT_INTERNAL, table, 2, MPIR_INT_INTERNAL,
116+
local_comm_ptr, MPIR_COLL_ATTR_SYNC);
116117
MPIR_ERR_CHECK(mpi_errno);
117118

118119
/* Step 2: How many processes have our same color? */
@@ -156,8 +157,8 @@ int MPIR_Comm_split_impl(MPIR_Comm * comm_ptr, int color, int key, MPIR_Comm **
156157
* same color */
157158
mypair.color = color;
158159
mypair.key = key;
159-
mpi_errno = MPIR_Allgather(&mypair, 2, MPIR_INT_INTERNAL, remotetable, 2, MPIR_INT_INTERNAL,
160-
comm_ptr, MPIR_COLL_ATTR_SYNC);
160+
mpi_errno = MPIR_Allgather_fallback(&mypair, 2, MPIR_INT_INTERNAL, remotetable, 2,
161+
MPIR_INT_INTERNAL, comm_ptr, MPIR_COLL_ATTR_SYNC);
161162
MPIR_ERR_CHECK(mpi_errno);
162163

163164
/* Each process can now match its color with the entries in the table */
@@ -214,19 +215,17 @@ int MPIR_Comm_split_impl(MPIR_Comm * comm_ptr, int color, int key, MPIR_Comm **
214215
&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
215216
0, 0, comm_ptr, MPI_STATUS_IGNORE, MPIR_COLL_ATTR_SYNC);
216217
MPIR_ERR_CHECK(mpi_errno);
217-
mpi_errno =
218-
MPIR_Bcast(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr,
219-
MPIR_COLL_ATTR_SYNC);
218+
mpi_errno = MPIR_Bcast_fallback(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0,
219+
local_comm_ptr, MPIR_COLL_ATTR_SYNC);
220220
MPIR_ERR_CHECK(mpi_errno);
221221

222222
if (!in_newcomm) {
223223
MPIR_Free_contextid(new_context_id);
224224
}
225225
} else {
226226
/* Broadcast to the other members of the local group */
227-
mpi_errno =
228-
MPIR_Bcast(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0, local_comm_ptr,
229-
MPIR_COLL_ATTR_SYNC);
227+
mpi_errno = MPIR_Bcast_fallback(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE, 0,
228+
local_comm_ptr, MPIR_COLL_ATTR_SYNC);
230229
MPIR_ERR_CHECK(mpi_errno);
231230
}
232231
}

src/mpi/comm/comm_split_type_nbhd.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,9 @@ static int network_split_by_minsize(MPIR_Comm * comm_ptr, int key, int subcomm_m
275275
}
276276
MPIR_Assert(num_processes_at_node != NULL);
277277
/* Send the count to processes */
278-
mpi_errno = MPIR_Allreduce(MPI_IN_PLACE, num_processes_at_node, num_nodes,
279-
MPIR_INT_INTERNAL, MPI_SUM, comm_ptr, MPIR_COLL_ATTR_SYNC);
278+
mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, num_processes_at_node, num_nodes,
279+
MPIR_INT_INTERNAL, MPI_SUM, comm_ptr,
280+
MPIR_COLL_ATTR_SYNC);
280281

281282
if (topo_type == MPIR_NETTOPO_TYPE__FAT_TREE ||
282283
topo_type == MPIR_NETTOPO_TYPE__CLOS_NETWORK) {
@@ -375,8 +376,8 @@ static int network_split_by_minsize(MPIR_Comm * comm_ptr, int key, int subcomm_m
375376
tree_depth = MPIR_hwtopo_get_depth(obj_containing_cpuset);
376377

377378
/* get min tree depth to all processes */
378-
MPIR_Allreduce(&tree_depth, &min_tree_depth, 1, MPIR_INT_INTERNAL, MPI_MIN, node_comm,
379-
MPIR_COLL_ATTR_SYNC);
379+
MPIR_Allreduce_fallback(&tree_depth, &min_tree_depth, 1, MPIR_INT_INTERNAL, MPI_MIN,
380+
node_comm, MPIR_COLL_ATTR_SYNC);
380381

381382
if (min_tree_depth) {
382383
int num_hwloc_objs_at_depth;
@@ -389,8 +390,8 @@ static int network_split_by_minsize(MPIR_Comm * comm_ptr, int key, int subcomm_m
389390
parent_idx[subcomm_rank] = obj_containing_cpuset;
390391

391392
/* get parent_idx to all processes */
392-
MPIR_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, parent_idx, 1, MPIR_INT_INTERNAL,
393-
node_comm, MPIR_COLL_ATTR_SYNC);
393+
MPIR_Allgather_fallback(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, parent_idx, 1,
394+
MPIR_INT_INTERNAL, node_comm, MPIR_COLL_ATTR_SYNC);
394395

395396
/* reorder parent indices */
396397
for (i = 0; i < num_procs - 1; i++) {

src/mpi/comm/commutil.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -780,8 +780,7 @@ static int init_comm_seq(MPIR_Comm * comm)
780780
/* Every rank need share the same seq from root. NOTE: it is possible for
781781
* different communicators to have the same seq. It is only used as an
782782
* opportunistic optimization */
783-
mpi_errno = MPIR_Bcast_allcomm_auto(&tmp, 1, MPIR_INT_INTERNAL, 0, comm,
784-
MPIR_COLL_ATTR_SYNC);
783+
mpi_errno = MPIR_Bcast_fallback(&tmp, 1, MPIR_INT_INTERNAL, 0, comm, MPIR_COLL_ATTR_SYNC);
785784
MPIR_ERR_CHECK(mpi_errno);
786785

787786
comm->seq = tmp;

src/mpi/comm/contextid.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -455,9 +455,9 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr
455455
MPIR_INT_INTERNAL, MPI_BAND, comm_ptr, group_ptr,
456456
coll_tag, MPIR_COLL_ATTR_SYNC);
457457
} else {
458-
mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1,
459-
MPIR_INT_INTERNAL, MPI_BAND, comm_ptr,
460-
MPIR_COLL_ATTR_SYNC);
458+
mpi_errno =
459+
MPIR_Allreduce_fallback(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1,
460+
MPIR_INT_INTERNAL, MPI_BAND, comm_ptr, MPIR_COLL_ATTR_SYNC);
461461
}
462462
MPIR_ERR_CHECK(mpi_errno);
463463

@@ -555,8 +555,8 @@ int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr
555555
MPI_MIN, comm_ptr, group_ptr, coll_tag,
556556
MPIR_COLL_ATTR_SYNC);
557557
} else {
558-
mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, &minfree, 1, MPIR_INT_INTERNAL,
559-
MPI_MIN, comm_ptr, MPIR_COLL_ATTR_SYNC);
558+
mpi_errno = MPIR_Allreduce_fallback(MPI_IN_PLACE, &minfree, 1, MPIR_INT_INTERNAL,
559+
MPI_MIN, comm_ptr, MPIR_COLL_ATTR_SYNC);
560560
}
561561

562562
if (minfree > 0) {
@@ -1053,8 +1053,8 @@ int MPIR_Get_intercomm_contextid(MPIR_Comm * comm_ptr, int *context_id, int *rec
10531053

10541054
/* Make sure that all of the local processes now have this
10551055
* id */
1056-
mpi_errno = MPIR_Bcast_impl(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
1057-
0, comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
1056+
mpi_errno = MPIR_Bcast_fallback(&remote_context_id, 1, MPIR_CONTEXT_ID_T_DATATYPE,
1057+
0, comm_ptr->local_comm, MPIR_COLL_ATTR_SYNC);
10581058
MPIR_ERR_CHECK(mpi_errno);
10591059
/* The recvcontext_id must be the one that was allocated out of the local
10601060
* group, not the remote group. Otherwise we could end up posting two

0 commit comments

Comments
 (0)