Skip to content

Commit cf17d05

Browse files
committed
UCT/RC/TEST: More fixes for FC disabled mode and added unit tests
- Extract FC window check to common function - Fix checking FC window during flush - Added unit tests for FC disabled and setting the FC window to 0
1 parent a339438 commit cf17d05

File tree

5 files changed

+111
-22
lines changed

5 files changed

+111
-22
lines changed

src/uct/ib/dc/base/dc_ep.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ static inline int uct_dc_iface_dci_ep_can_send(uct_dc_ep_t *ep)
167167
{
168168
uct_dc_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_iface_t);
169169
return (!(ep->flags & UCT_DC_EP_FLAG_TX_WAIT)) &&
170-
(ep->fc.fc_wnd > 0) &&
170+
uct_rc_fc_has_resources(&iface->super, &ep->fc) &&
171171
uct_dc_iface_dci_has_tx_resources(iface, ep->dci);
172172
}
173173

@@ -176,7 +176,7 @@ void uct_dc_iface_schedule_dci_alloc(uct_dc_iface_t *iface, uct_dc_ep_t *ep)
176176
{
177177
/* If FC window is empty the group will be scheduled when
178178
* grant is received */
179-
if ((ep->fc.fc_wnd > 0) || !iface->super.config.fc_enabled) {
179+
if (uct_rc_fc_has_resources(&iface->super, &ep->fc)) {
180180
ucs_arbiter_group_schedule(uct_dc_iface_dci_waitq(iface), &ep->arb_group);
181181
}
182182
}

src/uct/ib/rc/base/rc_ep.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ enum {
120120

121121
#define UCT_RC_UPDATE_FC_WND(_iface, _fc) \
122122
{ \
123+
/* For performance reasons, prefer to update fc_wnd unconditionally */ \
123124
(_fc)->fc_wnd--; \
124125
\
125126
if ((_iface)->config.fc_enabled) { \
@@ -304,12 +305,19 @@ static UCS_F_ALWAYS_INLINE void uct_rc_txqp_check(uct_rc_txqp_t *txqp)
304305
txqp->qp->qp_num, txqp->qp->state);
305306
}
306307

308+
static UCS_F_ALWAYS_INLINE
309+
int uct_rc_fc_has_resources(uct_rc_iface_t *iface, uct_rc_fc_t *fc)
310+
{
311+
/* When FC is disabled, fc_wnd may still become 0 because it's decremented
312+
* unconditionally (for performance reasons) */
313+
return (fc->fc_wnd > 0) || !iface->config.fc_enabled;
314+
}
315+
307316
static UCS_F_ALWAYS_INLINE int uct_rc_ep_has_tx_resources(uct_rc_ep_t *ep)
308317
{
309318
uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_rc_iface_t);
310319

311-
return (ep->txqp.available > 0) &&
312-
((ep->fc.fc_wnd > 0) || !iface->config.fc_enabled);
320+
return (ep->txqp.available > 0) && uct_rc_fc_has_resources(iface, &ep->fc);
313321
}
314322

315323
static UCS_F_ALWAYS_INLINE void

test/gtest/uct/ib/test_dc.cc

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,8 +371,8 @@ class test_dc_flow_control : public test_rc_flow_control {
371371
public:
372372

373373
/* virtual */
374-
uct_rc_fc_t* get_fc_ptr(entity *e) {
375-
return &ucs_derived_of(e->ep(0), uct_dc_ep_t)->fc;
374+
uct_rc_fc_t* get_fc_ptr(entity *e, int ep_idx = 0) {
375+
return &ucs_derived_of(e->ep(ep_idx), uct_dc_ep_t)->fc;
376376
}
377377
};
378378

@@ -395,6 +395,41 @@ UCS_TEST_P(test_dc_flow_control, pending_grant)
395395
flush();
396396
}
397397

398+
UCS_TEST_P(test_dc_flow_control, fc_disabled_flush)
399+
{
400+
test_flush_fc_disabled();
401+
}
402+
403+
UCS_TEST_P(test_dc_flow_control, fc_disabled_pending_no_dci) {
404+
405+
pending_send_request_t pending_req;
406+
pending_req.uct.func = pending_cb;
407+
pending_req.cb_count = 0;
408+
409+
set_fc_disabled(m_e1);
410+
411+
/* Send on new endpoints until out of DCIs */
412+
for (int ep_index = 0; ep_index < 20; ++ep_index) {
413+
m_e1->connect(ep_index, *m_e2, ep_index);
414+
415+
ucs_status_t status = uct_ep_am_short(m_e1->ep(ep_index), 0, 0, NULL, 0);
416+
if (status == UCS_ERR_NO_RESOURCE) {
417+
/* if FC is disabled, it should be OK to set fc_wnd to 0 */
418+
get_fc_ptr(m_e1, ep_index)->fc_wnd = 0;
419+
420+
/* Add to pending */
421+
status = uct_ep_pending_add(m_e1->ep(ep_index), &pending_req.uct);
422+
ASSERT_UCS_OK(status);
423+
424+
wait_for_flag(&pending_req.cb_count);
425+
EXPECT_EQ(1, pending_req.cb_count);
426+
break;
427+
}
428+
429+
ASSERT_UCS_OK(status);
430+
}
431+
}
432+
398433
/* Check that soft request is not handled by DC */
399434
UCS_TEST_P(test_dc_flow_control, soft_request)
400435
{
@@ -487,8 +522,8 @@ class test_dc_flow_control_stats : public test_rc_flow_control_stats {
487522
test_rc_flow_control_stats::init();
488523
}
489524

490-
uct_rc_fc_t* get_fc_ptr(entity *e) {
491-
return &ucs_derived_of(e->ep(0), uct_dc_ep_t)->fc;
525+
uct_rc_fc_t* get_fc_ptr(entity *e, int ep_idx = 0) {
526+
return &ucs_derived_of(e->ep(ep_idx), uct_dc_ep_t)->fc;
492527
}
493528

494529
uct_rc_fc_t* fake_ep_fc_ptr(entity *e) {

test/gtest/uct/ib/test_rc.cc

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ UCS_TEST_P(test_rc_max_wr, send_limit)
6767
UCT_RC_INSTANTIATE_TEST_CASE(test_rc_max_wr)
6868

6969

70-
int test_rc_flow_control::m_req_count = 0;
7170
uint32_t test_rc_flow_control::m_am_rx_count = 0;
7271

7372
void test_rc_flow_control::init()
@@ -147,24 +146,48 @@ void test_rc_flow_control::test_pending_grant(int wnd)
147146
send_am_messages(m_e1, 1, UCS_OK);
148147
}
149148

149+
void test_rc_flow_control::test_flush_fc_disabled()
150+
{
151+
set_fc_disabled(m_e1);
152+
ucs_status_t status;
153+
154+
/* If FC is disabled, wnd=0 should not prevent the flush */
155+
get_fc_ptr(m_e1)->fc_wnd = 0;
156+
status = uct_ep_flush(m_e1->ep(0), 0, NULL);
157+
EXPECT_EQ(UCS_OK, status);
158+
159+
/* send active message should be OK */
160+
get_fc_ptr(m_e1)->fc_wnd = 1;
161+
status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0);
162+
EXPECT_EQ(UCS_OK, status);
163+
EXPECT_EQ(0, get_fc_ptr(m_e1)->fc_wnd);
164+
165+
/* flush must have resources */
166+
status = uct_ep_flush(m_e1->ep(0), 0, NULL);
167+
EXPECT_FALSE(UCS_STATUS_IS_ERR(status)) << ucs_status_string(status);
168+
}
169+
150170
void test_rc_flow_control::test_pending_purge(int wnd, int num_pend_sends)
151171
{
152-
uct_pending_req_t reqs[num_pend_sends];
172+
pending_send_request_t reqs[num_pend_sends];
153173

154174
disable_entity(m_e2);
155175
set_fc_attributes(m_e1, true, wnd, wnd, 1);
156176

157-
m_req_count = 0;
158177
send_am_and_flush(m_e1, wnd);
159178

160179
/* Now m2 ep should have FC grant message in the pending queue.
161180
* Add some user pending requests as well */
162-
for (int i = 0; i < num_pend_sends; i ++) {
163-
reqs[i].func = NULL; /* make valgrind happy */
164-
EXPECT_EQ(uct_ep_pending_add(m_e2->ep(0), &reqs[i]), UCS_OK);
181+
for (int i = 0; i < num_pend_sends; i++) {
182+
reqs[i].uct.func = NULL; /* make valgrind happy */
183+
reqs[i].purge_count = 0;
184+
EXPECT_EQ(uct_ep_pending_add(m_e2->ep(0), &reqs[i].uct), UCS_OK);
165185
}
166186
uct_ep_pending_purge(m_e2->ep(0), purge_cb, NULL);
167-
EXPECT_EQ(num_pend_sends, m_req_count);
187+
188+
for (int i = 0; i < num_pend_sends; i++) {
189+
EXPECT_EQ(1, reqs[i].purge_count);
190+
}
168191
}
169192

170193

@@ -206,6 +229,11 @@ UCS_TEST_P(test_rc_flow_control, pending_grant)
206229
test_pending_grant(5);
207230
}
208231

232+
UCS_TEST_P(test_rc_flow_control, fc_disabled_flush)
233+
{
234+
test_flush_fc_disabled();
235+
}
236+
209237
UCT_RC_INSTANTIATE_TEST_CASE(test_rc_flow_control)
210238

211239

test/gtest/uct/ib/test_rc.h

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ class test_rc : public uct_test {
2626
return ucs_derived_of(e->iface(), uct_rc_iface_t);
2727
}
2828

29-
uct_rc_ep_t* rc_ep(entity *e) {
30-
return ucs_derived_of(e->ep(0), uct_rc_ep_t);
29+
uct_rc_ep_t* rc_ep(entity *e, int ep_idx = 0) {
30+
return ucs_derived_of(e->ep(ep_idx), uct_rc_ep_t);
3131
}
3232

3333
void send_am_messages(entity *e, int wnd, ucs_status_t expected,
@@ -54,14 +54,15 @@ class test_rc : public uct_test {
5454
class test_rc_flow_control : public test_rc {
5555
public:
5656
typedef struct pending_send_request {
57-
uct_ep_h ep;
5857
uct_pending_req_t uct;
58+
int cb_count;
59+
int purge_count;
5960
} pending_send_request_t;
6061

6162
void init();
6263

63-
virtual uct_rc_fc_t* get_fc_ptr(entity *e) {
64-
return &rc_ep(e)->fc;
64+
virtual uct_rc_fc_t* get_fc_ptr(entity *e, int ep_idx = 0) {
65+
return &rc_ep(e, ep_idx)->fc;
6566
}
6667

6768
virtual void disable_entity(entity *e) {
@@ -84,6 +85,11 @@ class test_rc_flow_control : public test_rc {
8485

8586
}
8687

88+
void set_fc_disabled(entity *e) {
89+
/* same as default settings in rc_iface_init */
90+
set_fc_attributes(m_e1, false, std::numeric_limits<int16_t>::max(), 0, 0);
91+
}
92+
8793
void send_am_and_flush(entity *e, int num_msg);
8894

8995
void progress_loop(double delta_ms=10.0) {
@@ -102,7 +108,18 @@ class test_rc_flow_control : public test_rc {
102108
}
103109

104110
static void purge_cb(uct_pending_req_t *self, void *arg) {
105-
m_req_count++;
111+
pending_send_request_t *req = ucs_container_of(self,
112+
pending_send_request_t,
113+
uct);
114+
++req->purge_count;
115+
}
116+
117+
static ucs_status_t pending_cb(uct_pending_req_t *self) {
118+
pending_send_request_t *req = ucs_container_of(self,
119+
pending_send_request_t,
120+
uct);
121+
++req->cb_count;
122+
return UCS_OK;
106123
}
107124

108125
void validate_grant(entity *e);
@@ -113,8 +130,9 @@ class test_rc_flow_control : public test_rc {
113130

114131
void test_pending_purge(int wnd, int num_pend_sends);
115132

133+
void test_flush_fc_disabled();
134+
116135
protected:
117-
static int m_req_count;
118136
static const uint8_t FLUSH_AM_ID = 1;
119137
static uint32_t m_am_rx_count;
120138
};

0 commit comments

Comments
 (0)