Skip to content

Commit 865440e

Browse files
authored
Merge pull request #10364 from roiedanino/v1.18.x/ddp-devx
v1.18.x - UCT/IB/MLX5: Add DDP support for DevX
2 parents b13e930 + 71b95ff commit 865440e

File tree

14 files changed

+231
-118
lines changed

14 files changed

+231
-118
lines changed

src/ucs/sys/compiler_def.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@
8080
/* Mask of bits 0..i-1 */
8181
#define UCS_MASK(_i) (((_i) >= 64) ? ~0 : (UCS_BIT(_i) - 1))
8282

83+
/* The i-th bit */
84+
#define UCS_BIT_GET(_value, _i) (!!((_value) & UCS_BIT(_i)))
85+
8386
/*
8487
* Enable compiler checks for printf-like formatting.
8588
*

src/uct/ib/base/ib_iface.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,9 @@ enum {
239239
/* Indicates that TX cq len in uct_ib_iface_init_attr_t is specified per
240240
* each IB path. Therefore IB interface constructor would need to multiply
241241
* TX CQ len by the number of IB paths (when it is properly initialized). */
242-
UCT_IB_TX_OPS_PER_PATH = UCS_BIT(2)
242+
UCT_IB_TX_OPS_PER_PATH = UCS_BIT(2),
243+
/* Whether device and transport supports DDP */
244+
UCT_IB_DDP_SUPPORTED = UCS_BIT(3)
243245
};
244246

245247

@@ -345,7 +347,6 @@ struct uct_ib_iface {
345347
enum ibv_mtu path_mtu;
346348
uint8_t counter_set_id;
347349
uct_ib_iface_send_overhead_t send_overhead;
348-
ucs_ternary_auto_value_t dp_ordering_ooo; /* Activate RW OOO */
349350
} config;
350351

351352
uct_ib_iface_ops_t *ops;

src/uct/ib/mlx5/dc/dc_mlx5.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,10 @@ static UCS_CLASS_INIT_FUNC(uct_dc_mlx5_iface_t, uct_md_h tl_md, uct_worker_h wor
16101610
init_attr.flags |= UCT_IB_TM_SUPPORTED;
16111611
}
16121612

1613+
if (md->dp_ordering_cap.dc == UCT_IB_MLX5_DP_ORDERING_OOO_ALL) {
1614+
init_attr.flags |= UCT_IB_DDP_SUPPORTED;
1615+
}
1616+
16131617
status = uct_dc_mlx5_calc_sq_length(md, tx_queue_len, &sq_length);
16141618
if (status != UCS_OK) {
16151619
return status;
@@ -1623,9 +1627,9 @@ static UCS_CLASS_INIT_FUNC(uct_dc_mlx5_iface_t, uct_md_h tl_md, uct_worker_h wor
16231627
tl_md, worker, params, &config->super,
16241628
&config->rc_mlx5_common, &init_attr);
16251629

1626-
status = uct_rc_mlx5_dp_ordering_ooo_init(
1627-
&self->super, UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_DC,
1628-
&config->rc_mlx5_common, "dc");
1630+
status = uct_rc_mlx5_dp_ordering_ooo_init(&self->super,
1631+
md->dp_ordering_cap.dc,
1632+
&config->rc_mlx5_common, "dc");
16291633
if (status != UCS_OK) {
16301634
return status;
16311635
}

src/uct/ib/mlx5/dc/dc_mlx5_devx.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,14 @@ ucs_status_t uct_dc_mlx5_iface_devx_create_dct(uct_dc_mlx5_iface_t *iface)
4949
ib_iface->config.max_inl_cqe[UCT_IB_DIR_RX], 1));
5050
UCT_IB_MLX5DV_SET(dctc, dctc, atomic_mode,
5151
uct_ib_mlx5_get_atomic_mode(ib_iface));
52-
if (uct_ib_iface_is_roce(&iface->super.super.super)) {
53-
UCT_IB_MLX5DV_SET(dctc, dctc, dp_ordering_0,
54-
ucs_ternary_auto_value_is_yes_or_try(
55-
ib_iface->config.dp_ordering_ooo));
56-
UCT_IB_MLX5DV_SET(dctc, dctc, dp_ordering_1, 0);
57-
UCT_IB_MLX5DV_SET(dctc, dctc, dp_ordering_force,
58-
ucs_ternary_auto_value_is_yes_or_no(
59-
ib_iface->config.dp_ordering_ooo));
60-
} else {
52+
UCT_IB_MLX5DV_SET(dctc, dctc, dp_ordering_0,
53+
UCS_BIT_GET(iface->super.config.dp_ordering, 0));
54+
UCT_IB_MLX5DV_SET(dctc, dctc, dp_ordering_1,
55+
UCS_BIT_GET(iface->super.config.dp_ordering, 1));
56+
UCT_IB_MLX5DV_SET(dctc, dctc, dp_ordering_force,
57+
iface->super.config.dp_ordering_force);
58+
59+
if (!uct_ib_iface_is_roce(&iface->super.super.super)) {
6160
UCT_IB_MLX5DV_SET(dctc, dctc, pkey_index, ib_iface->pkey_index);
6261
}
6362

@@ -145,16 +144,16 @@ ucs_status_t uct_dc_mlx5_iface_devx_dci_connect(uct_dc_mlx5_iface_t *iface,
145144
UCT_IB_MLX5DV_SET(qpc, qpc, atomic_mode,
146145
uct_ib_mlx5_get_atomic_mode(&rc_iface->super));
147146
UCT_IB_MLX5DV_SET(qpc, qpc, rae, true);
147+
148+
uct_ib_mlx5_devx_set_qpc_dp_ordering(md, qpc, &iface->super);
149+
148150
if (uct_ib_iface_is_roce(&rc_iface->super)) {
149151
UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.eth_prio,
150152
rc_iface->super.config.sl);
151153
if (iface->tx.port_affinity) {
152154
uct_ib_mlx5_devx_set_qpc_port_affinity(md, dci_config->path_index,
153155
qpc, &opt_param_mask);
154156
}
155-
156-
uct_ib_mlx5_devx_set_qpc_dp_ordering(
157-
qpc, rc_iface->super.config.dp_ordering_ooo);
158157
} else {
159158
UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.sl,
160159
rc_iface->super.config.sl);

src/uct/ib/mlx5/dv/ib_mlx5_dv.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -462,16 +462,6 @@ ucs_status_t uct_ib_mlx5_devx_query_ooo_sl_mask(uct_ib_mlx5_md_t *md,
462462
return UCS_OK;
463463
}
464464

465-
void uct_ib_mlx5_devx_set_qpc_dp_ordering(
466-
void *qpc, ucs_ternary_auto_value_t dp_ordering_ooo)
467-
{
468-
UCT_IB_MLX5DV_SET(qpc, qpc, dp_ordering_0,
469-
ucs_ternary_auto_value_is_yes_or_try(dp_ordering_ooo));
470-
UCT_IB_MLX5DV_SET(qpc, qpc, dp_ordering_1, 0);
471-
UCT_IB_MLX5DV_SET(qpc, qpc, dp_ordering_force,
472-
ucs_ternary_auto_value_is_yes_or_no(dp_ordering_ooo));
473-
}
474-
475465
void uct_ib_mlx5_devx_set_qpc_port_affinity(uct_ib_mlx5_md_t *md,
476466
uint8_t path_index, void *qpc,
477467
uint32_t *opt_param_mask)

src/uct/ib/mlx5/dv/ib_mlx5_ifc.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,12 @@ struct uct_ib_mlx5_cmd_hca_cap_bits {
323323
uint8_t log_max_transport_domain[0x5];
324324
uint8_t reserved_at_328[0x3];
325325
uint8_t log_max_pd[0x5];
326-
uint8_t reserved_at_330[0xb];
326+
uint8_t dp_ordering_ooo_all_ud[0x1];
327+
uint8_t dp_ordering_ooo_all_uc[0x1];
328+
uint8_t dp_ordering_ooo_all_xrc[0x1];
329+
uint8_t dp_ordering_ooo_all_dc[0x1];
330+
uint8_t dp_ordering_ooo_all_rc[0x1];
331+
uint8_t reserved_at_335[0x6];
327332
uint8_t log_max_xrcd[0x5];
328333

329334
uint8_t nic_receive_steering_discard[0x1];

src/uct/ib/mlx5/dv/ib_mlx5dv_md.c

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,12 +1883,20 @@ static void uct_ib_mlx5_devx_check_dp_ordering(uct_ib_mlx5_md_t *md, void *cap,
18831883
void *cap_2,
18841884
uct_ib_device_t *dev)
18851885
{
1886-
if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, dp_ordering_ooo_rw_rc)) {
1887-
md->flags |= UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_RC;
1886+
if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, dp_ordering_ooo_all_rc)) {
1887+
md->dp_ordering_cap.rc = UCT_IB_MLX5_DP_ORDERING_OOO_ALL;
1888+
} else if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, dp_ordering_ooo_rw_rc)) {
1889+
md->dp_ordering_cap.rc = UCT_IB_MLX5_DP_ORDERING_OOO_RW;
1890+
} else {
1891+
md->dp_ordering_cap.rc = UCT_IB_MLX5_DP_ORDERING_IBTA;
18881892
}
18891893

1890-
if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, dp_ordering_ooo_rw_dc)) {
1891-
md->flags |= UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_DC;
1894+
if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, dp_ordering_ooo_all_dc)) {
1895+
md->dp_ordering_cap.dc = UCT_IB_MLX5_DP_ORDERING_OOO_ALL;
1896+
} else if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, dp_ordering_ooo_rw_dc)) {
1897+
md->dp_ordering_cap.dc = UCT_IB_MLX5_DP_ORDERING_OOO_RW;
1898+
} else {
1899+
md->dp_ordering_cap.dc = UCT_IB_MLX5_DP_ORDERING_IBTA;
18921900
}
18931901

18941902
if ((cap_2 != NULL) &&
@@ -1899,8 +1907,7 @@ static void uct_ib_mlx5_devx_check_dp_ordering(uct_ib_mlx5_md_t *md, void *cap,
18991907
ucs_debug("%s: dp_ordering support: force=%d ooo_rw_rc=%d ooo_rw_dc=%d",
19001908
uct_ib_device_name(dev),
19011909
!!(md->flags & UCT_IB_MLX5_MD_FLAG_DP_ORDERING_FORCE),
1902-
!!(md->flags & UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_RC),
1903-
!!(md->flags & UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_DC));
1910+
md->dp_ordering_cap.rc, md->dp_ordering_cap.dc);
19041911
}
19051912

19061913
static void uct_ib_mlx5_devx_check_mkey_by_name(uct_ib_mlx5_md_t *md,
@@ -3153,7 +3160,8 @@ static void uct_ib_mlx5dv_check_dc(uct_ib_device_t *dev)
31533160

31543161
static uct_ib_md_ops_t uct_ib_mlx5_md_ops;
31553162

3156-
static void uct_ib_mlx5dv_query_ddp(struct ibv_context *ctx, uct_ib_mlx5_md_t *md)
3163+
static ucs_status_t
3164+
uct_ib_mlx5dv_check_ddp(struct ibv_context *ctx, uct_ib_mlx5_md_t *md)
31573165
{
31583166
#ifdef HAVE_OOO_RECV_WRS
31593167
struct mlx5dv_context ctx_dv = {
@@ -3164,13 +3172,21 @@ static void uct_ib_mlx5dv_query_ddp(struct ibv_context *ctx, uct_ib_mlx5_md_t *m
31643172
ret = mlx5dv_query_device(ctx, &ctx_dv);
31653173
if (ret != 0) {
31663174
ucs_error("mlx5dv_query_device: Failed to query device capabilities, ret=%d\n", ret);
3167-
return;
3175+
return UCS_ERR_NO_RESOURCE;
31683176
}
31693177

31703178
if (ctx_dv.ooo_recv_wrs_caps.max_rc > 0) {
3171-
md->flags |= UCT_IB_MLX5_MD_FLAG_DDP;
3179+
md->dp_ordering_cap.rc = UCT_IB_MLX5_DP_ORDERING_OOO_ALL;
3180+
}
3181+
3182+
if (ctx_dv.ooo_recv_wrs_caps.max_dct > 0) {
3183+
md->dp_ordering_cap.dc = UCT_IB_MLX5_DP_ORDERING_OOO_ALL;
31723184
}
3185+
#else
3186+
md->dp_ordering_cap.rc = UCT_IB_MLX5_DP_ORDERING_IBTA;
3187+
md->dp_ordering_cap.dc = UCT_IB_MLX5_DP_ORDERING_IBTA;
31733188
#endif
3189+
return UCS_OK;
31743190
}
31753191

31763192
static ucs_status_t uct_ib_mlx5dv_md_open(struct ibv_device *ibv_device,
@@ -3208,7 +3224,10 @@ static ucs_status_t uct_ib_mlx5dv_md_open(struct ibv_device *ibv_device,
32083224
goto err_md_free;
32093225
}
32103226

3211-
uct_ib_mlx5dv_query_ddp(ctx, md);
3227+
status = uct_ib_mlx5dv_check_ddp(ctx, md);
3228+
if (status != UCS_OK) {
3229+
goto err_md_free;
3230+
}
32123231

32133232
if (IBV_DEVICE_ATOMIC_HCA(dev)) {
32143233
dev->atomic_arg_sizes = sizeof(uint64_t);

src/uct/ib/mlx5/gga/gga_mlx5.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,8 @@ static UCS_CLASS_INIT_FUNC(uct_gga_mlx5_iface_t,
663663
&init_attr);
664664

665665
status = uct_rc_mlx5_dp_ordering_ooo_init(
666-
&self->super, UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_RC,
666+
&self->super,
667+
ucs_min(md->dp_ordering_cap.rc, UCT_IB_MLX5_DP_ORDERING_OOO_RW),
667668
&config->rc_mlx5_common, "gga");
668669
if (status != UCS_OK) {
669670
return status;

src/uct/ib/mlx5/ib_mlx5.h

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -204,17 +204,11 @@ enum {
204204
UCT_IB_MLX5_MD_FLAG_UAR_USE_WC = UCS_BIT(17),
205205
/* Device supports implicit ODP with PCI relaxed order */
206206
UCT_IB_MLX5_MD_FLAG_GVA_RO = UCS_BIT(18),
207-
/* RoCE supports out-of-order RDMA for RC */
208-
UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_RC = UCS_BIT(19),
209-
/* RoCE supports out-of-order RDMA for DC */
210-
UCT_IB_MLX5_MD_FLAG_DP_ORDERING_OOO_RW_DC = UCS_BIT(20),
211-
/* RoCE supports forcing ordering configuration */
212-
UCT_IB_MLX5_MD_FLAG_DP_ORDERING_FORCE = UCS_BIT(21),
213-
/* Device supports DDP (OOO data placement)*/
214-
UCT_IB_MLX5_MD_FLAG_DDP = UCS_BIT(22),
207+
/* Device supports forcing ordering configuration */
208+
UCT_IB_MLX5_MD_FLAG_DP_ORDERING_FORCE = UCS_BIT(19),
215209

216210
/* Object to be created by DevX */
217-
UCT_IB_MLX5_MD_FLAG_DEVX_OBJS_SHIFT = 23,
211+
UCT_IB_MLX5_MD_FLAG_DEVX_OBJS_SHIFT = 20,
218212
UCT_IB_MLX5_MD_FLAG_DEVX_RC_QP = UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(RCQP),
219213
UCT_IB_MLX5_MD_FLAG_DEVX_RC_SRQ = UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(RCSRQ),
220214
UCT_IB_MLX5_MD_FLAG_DEVX_DCT = UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(DCT),
@@ -386,6 +380,16 @@ KHASH_MAP_INIT_INT(rkeys, uct_ib_mlx5_mem_lru_entry_t*);
386380
#endif
387381

388382

383+
typedef enum {
384+
/* IBTA-compliant ordering semantics */
385+
UCT_IB_MLX5_DP_ORDERING_IBTA = 0x0,
386+
/* Out-of-order RDMA reads and writes */
387+
UCT_IB_MLX5_DP_ORDERING_OOO_RW = 0x1,
388+
/* Out-of-order RDMA read/write/send/recv (DDP) */
389+
UCT_IB_MLX5_DP_ORDERING_OOO_ALL = 0x2,
390+
} uct_ib_mlx5_dp_ordering_t;
391+
392+
389393
/**
390394
* MLX5 IB memory domain.
391395
*/
@@ -431,6 +435,12 @@ typedef struct uct_ib_mlx5_md {
431435
uint8_t max_rd_atomic_dc;
432436
uint8_t log_max_dci_stream_channels;
433437
uint32_t smkey_index;
438+
struct {
439+
/* Max dp ordering level per transport,
440+
as listed in uct_ib_mlx5_dp_ordering_t */
441+
uint8_t rc;
442+
uint8_t dc;
443+
} dp_ordering_cap;
434444
} uct_ib_mlx5_md_t;
435445

436446

@@ -986,9 +996,6 @@ ucs_status_t uct_ib_mlx5_devx_query_ooo_sl_mask(uct_ib_mlx5_md_t *md,
986996
uint8_t port_num,
987997
uint16_t *ooo_sl_mask_p);
988998

989-
void uct_ib_mlx5_devx_set_qpc_dp_ordering(
990-
void *qpc, ucs_ternary_auto_value_t dp_ordering_ooo);
991-
992999
void uct_ib_mlx5_devx_set_qpc_port_affinity(uct_ib_mlx5_md_t *md,
9931000
uint8_t path_index, void *qpc,
9941001
uint32_t *opt_param_mask);

0 commit comments

Comments
 (0)