Skip to content

Commit 5f1827a

Browse files
committed
ch4/wait: Optimize wait when requests are local
Optimize progress for a batch of local communication requests by skipping netmod progress. Shows a 5-10% improvement in ch4/shm bandwidth measurements on a single node of Cascade Lake.
1 parent 85d781f commit 5f1827a

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

src/mpid/ch4/src/ch4_wait.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,28 @@
1111
MPL_STATIC_INLINE_PREFIX void MPIDI_set_progress_vci(MPIR_Request * req,
1212
MPID_Progress_state * state)
1313
{
14-
state->flag = MPIDI_PROGRESS_ALL; /* TODO: check request is_local/anysource */
14+
state->flag = MPIDI_PROGRESS_ALL;
1515

1616
int vci = MPIDI_Request_get_vci(req);
1717

1818
state->vci_count = 1;
1919
state->vci[0] = vci;
20+
21+
#ifndef MPIDI_CH4_DIRECT_NETMOD
22+
if (!req->dev.anysrc_partner && MPIDI_REQUEST(req, is_local)) {
23+
state->flag &= ~MPIDI_PROGRESS_NM;
24+
}
25+
#endif
2026
}
2127

2228
MPL_STATIC_INLINE_PREFIX void MPIDI_set_progress_vci_n(int n, MPIR_Request ** reqs,
2329
MPID_Progress_state * state)
2430
{
25-
state->flag = MPIDI_PROGRESS_ALL; /* TODO: check request is_local/anysource */
31+
state->flag = MPIDI_PROGRESS_ALL;
32+
bool nm_progress = false;
33+
#ifdef MPIDI_CH4_DIRECT_NETMOD
34+
nm_progress = true;
35+
#endif
2636

2737
int idx = 0;
2838
for (int i = 0; i < n; i++) {
@@ -45,8 +55,16 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_set_progress_vci_n(int n, MPIR_Request ** re
4555
if (!found) {
4656
state->vci[idx++] = vci;
4757
}
58+
#ifndef MPIDI_CH4_DIRECT_NETMOD
59+
nm_progress |= !MPIDI_REQUEST(reqs[i], is_local);
60+
nm_progress |= reqs[i]->dev.anysrc_partner != NULL;
61+
#endif
4862
}
4963
state->vci_count = idx;
64+
65+
if (!nm_progress) {
66+
state->flag &= ~MPIDI_PROGRESS_NM;
67+
}
5068
}
5169

5270
/* MPID_Test, MPID_Testall, MPID_Testany, MPID_Testsome */

0 commit comments

Comments
 (0)