Skip to content

Commit 1af9b52

Browse files
committed
coll/csel: load json as named subtrees
Load src/mpi/coll/coll_selection.json as named subtrees. Add MPIR_Coll_run_tree which runs the selection on a subtree. Replace MPIR_Coll_composition_auto with MPIR_Coll_json, and replace MPIR_Coll_auto with MPIR_Coll_run_tree(csel_tree_auto, coll_sig). csel_tree_auto will fallback to csel_tree_main if it is not defined in the json file. But similarly, we can easily introduce more predefined subtree later, e.g. bcast-intra-auto etc. In CVAR selection, the "auto" should be default and value should be 0. Thus it should automatically fallthrough and run on csel_tree_main.
1 parent 744f5a9 commit 1af9b52

File tree

8 files changed

+82
-74
lines changed

8 files changed

+82
-74
lines changed

maint/gen_coll.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ def dump_cvar_cases(name, commkind):
366366

367367
dump_open("switch (cvar_val) {")
368368
G.out.append("case MPIR_CVAR_%s_%s_ALGORITHM_auto:" % (name.upper(), commkind.upper()))
369-
G.out.append(" return %s__MPIR_Coll_auto;" % (algo_id_prefix))
369+
G.out.append(" MPIR_Assert(0); /* auto cvar_val should be 0 and shouldn't be called here */")
370+
G.out.append(" return %s;" % algo_id_END())
370371
if not name.startswith("i"): # blocking
371372
G.out.append("case MPIR_CVAR_%s_%s_ALGORITHM_nb:" % (name.upper(), commkind.upper()))
372373
G.out.append(" return %s__MPIR_Coll_nb;" % (algo_id_prefix))
@@ -381,6 +382,7 @@ def dump_cvar_cases(name, commkind):
381382
add_prototype(decl)
382383
G.out.append(decl)
383384
dump_open("{")
385+
G.out.append("MPIR_Assert(cvar_val > 0);")
384386
dump_open("switch (coll_type) {")
385387
for coll in G.coll_names:
386388
for commkind in ("intra", "inter"):
@@ -612,7 +614,7 @@ def dump_coll_impl(name, blocking_type):
612614

613615
# Call csel
614616
G.out.append("")
615-
G.out.append("mpi_errno = MPIR_Coll_composition_auto(&coll_sig);")
617+
G.out.append("mpi_errno = MPIR_Coll_json(&coll_sig);")
616618
G.out.append("MPIR_ERR_CHECK(mpi_errno);")
617619
G.out.append("")
618620

maint/json_gen.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,4 @@ cat > $cfile<<EOF
3333
EOF
3434

3535
# create specific json buffers
36-
create_json_buf src/mpi/coll/coll_composition.json MPII_coll_composition_json
3736
create_json_buf src/mpi/coll/coll_selection.json MPII_coll_selection_json

src/mpi/coll/coll_algorithms.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ conditions:
7070

7171
# ----
7272
general:
73-
MPIR_Coll_auto
7473
MPIR_Coll_nb
7574

7675
# ----

src/mpi/coll/coll_composition.json

Lines changed: 0 additions & 3 deletions
This file was deleted.

src/mpi/coll/include/coll_csel.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,13 @@ extern const char **MPIR_Coll_type_names;
3333
extern const char **MPIR_Coll_algo_names;
3434
extern const char **MPIR_Csel_condition_names;
3535

36-
int MPIR_Csel_create_from_file(const char *json_file, void **csel);
37-
int MPIR_Csel_create_from_buf(const char *json, void **csel);
38-
int MPIR_Csel_free(void *csel);
36+
int MPIR_Csel_load_file(const char *json_file);
37+
int MPIR_Csel_load_buf(const char *json_str);
38+
int MPIR_Csel_free(void);
39+
MPIR_Csel_node_s *MPIR_Csel_get_tree(const char *name);
3940
MPII_Csel_container_s *MPIR_Csel_search(void *csel, MPIR_Csel_coll_sig_s * coll_sig);
4041
void MPIR_Csel_print_tree(MPIR_Csel_node_s * node, int level);
42+
int MPIR_Coll_run_tree(MPIR_Csel_node_s * tree, MPIR_Csel_coll_sig_s * coll_sig);
4143

4244
MPL_STATIC_INLINE_PREFIX int MPIR_Csel_comm_size(MPIR_Csel_coll_sig_s * coll_sig)
4345
{

src/mpi/coll/include/coll_impl.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ extern MPIR_Tree_type_t MPIR_Allreduce_tree_type;
3939
extern MPIR_Tree_type_t MPIR_Ireduce_tree_type;
4040
extern MPIR_Tree_type_t MPIR_Ibcast_tree_type;
4141
extern MPIR_Tree_type_t MPIR_Bcast_tree_type;
42-
extern char MPII_coll_composition_json[];
4342
extern char MPII_coll_selection_json[];
4443

4544
MPIR_Tree_type_t get_tree_type_from_string(const char *tree_str);
@@ -58,10 +57,7 @@ int MPIR_Coll_safe_to_block(void);
5857

5958
int MPII_Coll_finalize(void);
6059

61-
/* NOTE: MPIR_Coll_auto is one of the composition container functions. However,
62-
* MPIR_Coll_composition_auto is a gate function, thus does not take "cnt" parameter. */
63-
int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig);
64-
int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * cnt);
60+
int MPIR_Coll_json(MPIR_Csel_coll_sig_s * coll_sig);
6561

6662
#define MPII_GENTRAN_CREATE_SCHED_P() \
6763
do { \

src/mpi/coll/src/coll_impl.c

Lines changed: 26 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,6 @@ categories :
4040
description : >-
4141
Defines the location of tuning file that selects basic collective algorithms.
4242
43-
- name : MPIR_CVAR_COLL_COMPOSITION_JSON_FILE
44-
category : COLLECTIVE
45-
type : string
46-
default : ""
47-
class : none
48-
verbosity : MPI_T_VERBOSITY_USER_BASIC
49-
scope : MPI_T_SCOPE_ALL_EQ
50-
description : >-
51-
Defines the location of tuning file that selects composition collective algorithms.
52-
5343
- name : MPIR_CVAR_HIERARCHY_DUMP
5444
category : COLLECTIVE
5545
type : boolean
@@ -106,9 +96,8 @@ MPIR_Tree_type_t MPIR_Ireduce_tree_type = MPIR_TREE_TYPE_KARY;
10696
void *MPIR_Csel_root = NULL;
10797
const char *MPIR_Csel_source;
10898

109-
/* TODO: remove the old MPIR_Csel_root etc. */
110-
void *MPIR_Csel_composition = NULL;
111-
void *MPIR_Csel_selection = NULL;
99+
MPIR_Csel_node_s *csel_tree_main;
100+
MPIR_Csel_node_s *csel_tree_auto;
112101

113102
/* table of all collective algorithms */
114103
MPIR_Coll_algo_fn *MPIR_Coll_algo_table;
@@ -167,16 +156,6 @@ int get_ccl_from_string(const char *ccl_str)
167156
return ccl;
168157
}
169158

170-
#define LOAD_CSEL_JSON(csel_var, cvar_name, builtin_str) \
171-
do { \
172-
if (!strcmp(cvar_name, "")) { \
173-
mpi_errno = MPIR_Csel_create_from_buf(builtin_str, &csel_var); \
174-
} else { \
175-
mpi_errno = MPIR_Csel_create_from_file(cvar_name, &csel_var); \
176-
} \
177-
MPIR_ERR_CHECK(mpi_errno); \
178-
} while (0)
179-
180159
int MPII_Coll_init(void)
181160
{
182161
int mpi_errno = MPI_SUCCESS;
@@ -225,10 +204,21 @@ int MPII_Coll_init(void)
225204
MPII_Csel_init_condition_names();
226205

227206
/* initialize selection tree */
228-
LOAD_CSEL_JSON(MPIR_Csel_composition,
229-
MPIR_CVAR_COLL_COMPOSITION_JSON_FILE, MPII_coll_composition_json);
230-
LOAD_CSEL_JSON(MPIR_Csel_selection,
231-
MPIR_CVAR_COLL_SELECTION_JSON_FILE, MPII_coll_selection_json);
207+
mpi_errno = MPIR_Csel_load_buf(MPII_coll_selection_json);
208+
MPIR_ERR_CHECK(mpi_errno);
209+
210+
if (strcmp(MPIR_CVAR_COLL_SELECTION_JSON_FILE, "") != 0) {
211+
mpi_errno = MPIR_Csel_load_file(MPIR_CVAR_COLL_SELECTION_JSON_FILE);
212+
MPIR_ERR_CHECK(mpi_errno);
213+
}
214+
215+
csel_tree_main = MPIR_Csel_get_tree("main");
216+
MPIR_Assert(csel_tree_main);
217+
218+
csel_tree_auto = MPIR_Csel_get_tree("auto");
219+
if (!csel_tree_auto) {
220+
csel_tree_auto = csel_tree_main;
221+
}
232222

233223
fn_exit:
234224
return mpi_errno;
@@ -250,10 +240,7 @@ int MPII_Coll_finalize(void)
250240
mpi_errno = MPII_TSP_finalize();
251241
MPIR_ERR_CHECK(mpi_errno);
252242

253-
mpi_errno = MPIR_Csel_free(MPIR_Csel_composition);
254-
MPIR_ERR_CHECK(mpi_errno);
255-
256-
mpi_errno = MPIR_Csel_free(MPIR_Csel_selection);
243+
mpi_errno = MPIR_Csel_free();
257244
MPIR_ERR_CHECK(mpi_errno);
258245

259246
MPL_free(MPIR_Coll_algo_table);
@@ -262,6 +249,9 @@ int MPII_Coll_finalize(void)
262249
MPL_free(MPIR_Coll_type_names);
263250
MPL_free(MPIR_Csel_condition_names);
264251

252+
csel_tree_main = NULL;
253+
csel_tree_auto = NULL;
254+
265255
fn_exit:
266256
return mpi_errno;
267257
fn_fail:
@@ -408,13 +398,11 @@ void MPIR_Coll_host_buffer_persist_set(void *host_sendbuf, void *host_recvbuf, v
408398
}
409399
}
410400

411-
int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig)
401+
int MPIR_Coll_run_tree(MPIR_Csel_node_s * tree, MPIR_Csel_coll_sig_s * coll_sig)
412402
{
413403
int mpi_errno = MPI_SUCCESS;
414404

415-
/* TODO: need a mechanism in coll_sig so we can assert and prevent a dead recursion loop */
416-
417-
MPII_Csel_container_s *cnt = MPIR_Csel_search(MPIR_Csel_composition, coll_sig);
405+
MPII_Csel_container_s *cnt = MPIR_Csel_search(tree, coll_sig);
418406
MPIR_ERR_CHKANDJUMP(!cnt, mpi_errno, MPI_ERR_OTHER, "**csel_noresult");
419407

420408
mpi_errno = MPIR_Coll_algo_table[cnt->id] (coll_sig, cnt);
@@ -426,7 +414,7 @@ int MPIR_Coll_composition_auto(MPIR_Csel_coll_sig_s * coll_sig)
426414
goto fn_exit;
427415
}
428416

429-
int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me)
417+
int MPIR_Coll_json(MPIR_Csel_coll_sig_s * coll_sig)
430418
{
431419
int mpi_errno = MPI_SUCCESS;
432420

@@ -448,13 +436,7 @@ int MPIR_Coll_auto(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me)
448436
}
449437
}
450438

451-
/* Search an algorithm by Csel */
452-
MPII_Csel_container_s *cnt = MPIR_Csel_search(MPIR_Csel_selection, coll_sig);
453-
MPIR_ERR_CHKANDJUMP(!cnt, mpi_errno, MPI_ERR_OTHER, "**csel_noresult");
454-
455-
/* TODO: assert the selected algorithm is not a composition algorithm */
456-
457-
mpi_errno = MPIR_Coll_algo_table[cnt->id] (coll_sig, cnt);
439+
mpi_errno = MPIR_Coll_run_tree(csel_tree_main, coll_sig);
458440
MPIR_ERR_CHECK(mpi_errno);
459441

460442
fn_exit:
@@ -472,7 +454,7 @@ int MPIR_Coll_nb(MPIR_Csel_coll_sig_s * coll_sig, MPII_Csel_container_s * me)
472454
MPIR_Assert(coll_sig->coll_type % 2 == 0);
473455
coll_sig->coll_type += 1;
474456

475-
mpi_errno = MPIR_Coll_auto(coll_sig, NULL);
457+
mpi_errno = MPIR_Coll_run_tree(csel_tree_auto, coll_sig);
476458
MPIR_ERR_CHECK(mpi_errno);
477459

478460
MPIR_Request *req;

src/mpi/coll/src/csel.c

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ UT_array *csel_subtrees = NULL;
3434
int csel_main_idx = -1;
3535

3636
static int csel_name_to_idx(const char *name);
37+
static MPIR_Csel_node_s *csel_get_tree_by_idx(int idx);
3738
static void add_named_tree(const char *name, struct json_object *obj);
3839
static void replace_named_tree(int idx, struct json_object *obj);
3940
static int parse_named_trees(void);
@@ -142,12 +143,23 @@ static MPIR_Csel_node_s *parse_json_tree(struct json_object *obj)
142143
/* parse the json_object assuming it is a list of "name=xxx": subtree.
143144
* Otherwise, parse it as a single tree and set the name to "main".
144145
*/
146+
static void csel_subtree_dtor(void *item)
147+
{
148+
struct csel_subtree *p = item;
149+
MPIR_Assert(p->json_obj == NULL);
150+
if (p->node) {
151+
free_tree(p->node);
152+
p->node = NULL;
153+
}
154+
}
155+
145156
static int parse_json_names(struct json_object *obj)
146157
{
147158
int mpi_errno = MPI_SUCCESS;
148159

149160
if (!csel_subtrees) {
150-
static UT_icd csel_subtree_icd = { sizeof(struct csel_subtree), NULL, NULL, NULL };
161+
static UT_icd csel_subtree_icd =
162+
{ sizeof(struct csel_subtree), NULL, NULL, csel_subtree_dtor };
151163
utarray_new(csel_subtrees, &csel_subtree_icd, MPL_MEM_COLL);
152164
}
153165

@@ -185,13 +197,12 @@ static int parse_json_names(struct json_object *obj)
185197
return mpi_errno;
186198
}
187199

188-
int MPIR_Csel_create_from_buf(const char *json, void **csel_)
200+
int MPIR_Csel_load_buf(const char *json_str)
189201
{
190202
int mpi_errno = MPI_SUCCESS;
191-
MPIR_Csel_node_s *csel_root = NULL;
192203

193204
struct json_object *tree;
194-
tree = json_tokener_parse(json);
205+
tree = json_tokener_parse(json_str);
195206
if (tree == NULL)
196207
goto fn_exit;
197208

@@ -200,15 +211,10 @@ int MPIR_Csel_create_from_buf(const char *json, void **csel_)
200211
json_object_put(tree);
201212

202213
fn_exit:
203-
if (0 && MPIR_Process.rank == 0) {
204-
printf("====\n");
205-
MPIR_Csel_print_tree(csel_root, 0);
206-
}
207-
*csel_ = csel_root;
208214
return mpi_errno;
209215
}
210216

211-
int MPIR_Csel_create_from_file(const char *json_file, void **csel_)
217+
int MPIR_Csel_load_file(const char *json_file)
212218
{
213219
int mpi_errno = MPI_SUCCESS;
214220

@@ -223,19 +229,19 @@ int MPIR_Csel_create_from_file(const char *json_file, void **csel_)
223229
char *json = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
224230
close(fd);
225231

226-
mpi_errno = MPIR_Csel_create_from_buf(json, csel_);
232+
mpi_errno = MPIR_Csel_load_buf(json);
227233

228234
fn_fail:
229235
return mpi_errno;
230236
}
231237

232-
int MPIR_Csel_free(void *csel_root)
238+
int MPIR_Csel_free(void)
233239
{
234240
int mpi_errno = MPI_SUCCESS;
235241

236-
if (csel_root) {
237-
free_tree(csel_root);
238-
}
242+
utarray_clear(csel_subtrees);
243+
utarray_free(csel_subtrees);
244+
csel_subtrees = NULL;
239245

240246
return mpi_errno;
241247
}
@@ -246,6 +252,13 @@ MPII_Csel_container_s *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s * coll
246252
MPIR_Csel_node_s *node = csel_;
247253
while (node) {
248254
switch (node->type) {
255+
case CSEL_NODE_TYPE__OPERATOR__CALL:
256+
MPIR_Csel_node_s * tree = csel_get_tree_by_idx(node->u.call.idx);
257+
if (!tree) {
258+
goto fn_fail;
259+
}
260+
return MPIR_Csel_search(tree, coll_sig);
261+
249262
case CSEL_NODE_TYPE__OPERATOR__COLLECTIVE:
250263
if (node->u.collective.coll_type == coll_sig->coll_type)
251264
node = node->success;
@@ -267,10 +280,20 @@ MPII_Csel_container_s *MPIR_Csel_search(void *csel_, MPIR_Csel_coll_sig_s * coll
267280
}
268281
}
269282

283+
fn_fail:
270284
MPIR_Assert(0 && "MPIR_Csel_search failed to find an algorithm");
271285
return NULL;
272286
}
273287

288+
MPIR_Csel_node_s *MPIR_Csel_get_tree(const char *name)
289+
{
290+
int idx = csel_name_to_idx(name);
291+
if (idx >= 0) {
292+
return csel_get_tree_by_idx(idx);
293+
}
294+
return NULL;
295+
}
296+
274297
/* -- internal static routines -- */
275298

276299
static void free_tree(MPIR_Csel_node_s * node)
@@ -335,6 +358,14 @@ static int csel_name_to_idx(const char *name)
335358
}
336359
}
337360

361+
static MPIR_Csel_node_s *csel_get_tree_by_idx(int idx)
362+
{
363+
struct csel_subtree *p = (void *) utarray_eltptr(csel_subtrees, idx);
364+
MPIR_Assert(p);
365+
MPIR_Assert(p->node);
366+
return p->node;
367+
}
368+
338369
static void add_named_tree(const char *name, struct json_object *obj)
339370
{
340371
struct csel_subtree item = { NULL, obj };

0 commit comments

Comments
 (0)