Skip to content

Commit 76758f8

Browse files
authored
Merge pull request #9557 from yosefe/topic/uct-ib-add-flid-based-routing-support-v1.16.x
UCT/IB: Add FLID based routing support - v1.16.x
2 parents 4efdff9 + 460afd4 commit 76758f8

File tree

5 files changed

+88
-13
lines changed

5 files changed

+88
-13
lines changed

src/uct/ib/base/ib_device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#define UCT_IB_LINK_LOCAL_PREFIX be64toh(0xfe80000000000000ul) /* IBTA 4.1.1 12a */
5353
#define UCT_IB_SITE_LOCAL_PREFIX be64toh(0xfec0000000000000ul) /* IBTA 4.1.1 12b */
5454
#define UCT_IB_SITE_LOCAL_MASK be64toh(0xffffffffffff0000ul) /* IBTA 4.1.1 12b */
55+
#define UCT_IB_SITE_LOCAL_FLID_MASK be64toh(0xffffffff00000000ul) /* site-local + flid */
5556
#define UCT_IB_DEFAULT_ROCEV2_DSCP 106 /* Default DSCP for RoCE v2 */
5657
#define UCT_IB_ROCE_UDP_SRC_PORT_BASE 0xC000
5758
#define UCT_IB_CQE_SL_PKTYPE_MASK 0x7 /* SL for IB or packet type

src/uct/ib/base/ib_iface.c

Lines changed: 72 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@ ucs_config_field_t uct_ib_iface_config_table[] = {
134134
"Force interface to use global routing.",
135135
ucs_offsetof(uct_ib_iface_config_t, is_global), UCS_CONFIG_TYPE_BOOL},
136136

137+
{"FLID_ROUTE", "y",
138+
"Enable FLID based routing with site-local GIDs.",
139+
ucs_offsetof(uct_ib_iface_config_t, flid_enabled), UCS_CONFIG_TYPE_BOOL},
140+
137141
{"SL", "auto",
138142
"InfiniBand: Service level. 'auto' will select a value matching UCX_IB_AR configuration.\n"
139143
"RoCEv2: Ethernet Priority. 'auto' will select 0 by default.",
@@ -342,6 +346,18 @@ size_t uct_ib_address_size(const uct_ib_address_pack_params_t *params)
342346
return size;
343347
}
344348

349+
static int uct_ib_address_gid_is_site_local(const union ibv_gid *gid)
350+
{
351+
return (gid->global.subnet_prefix & UCT_IB_SITE_LOCAL_MASK) ==
352+
UCT_IB_SITE_LOCAL_PREFIX;
353+
}
354+
355+
static int uct_ib_address_gid_is_global(const union ibv_gid *gid)
356+
{
357+
return !uct_ib_address_gid_is_site_local(gid) &&
358+
(gid->global.subnet_prefix != UCT_IB_LINK_LOCAL_PREFIX);
359+
}
360+
345361
void uct_ib_address_pack(const uct_ib_address_pack_params_t *params,
346362
uct_ib_address_t *ib_addr)
347363
{
@@ -375,14 +391,13 @@ void uct_ib_address_pack(const uct_ib_address_pack_params_t *params,
375391
}
376392

377393
if (params->flags & UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX) {
378-
if ((params->gid.global.subnet_prefix & UCT_IB_SITE_LOCAL_MASK) ==
379-
UCT_IB_SITE_LOCAL_PREFIX) {
394+
if (uct_ib_address_gid_is_site_local(&params->gid)) {
380395
/* Site-local */
381396
ib_addr->flags |= UCT_IB_ADDRESS_FLAG_SUBNET16;
382397
*ucs_serialize_next(&ptr, uint16_t) =
383398
params->gid.global.subnet_prefix >> 48;
384-
} else if (params->gid.global.subnet_prefix != UCT_IB_LINK_LOCAL_PREFIX) {
385-
/* Global */
399+
} else if (uct_ib_address_gid_is_global(&params->gid)) {
400+
/* Global or site local GID with non-zero FLID */
386401
ib_addr->flags |= UCT_IB_ADDRESS_FLAG_SUBNET64;
387402
*ucs_serialize_next(&ptr, uint64_t) =
388403
params->gid.global.subnet_prefix;
@@ -691,6 +706,22 @@ int uct_ib_iface_is_same_device(const uct_ib_address_t *ib_addr, uint16_t dlid,
691706
(params.gid.global.interface_id == dgid->global.interface_id);
692707
}
693708

709+
static int uct_ib_iface_gid_extract_flid(const union ibv_gid *gid)
710+
{
711+
if ((gid->global.subnet_prefix & UCT_IB_SITE_LOCAL_FLID_MASK) !=
712+
UCT_IB_SITE_LOCAL_PREFIX) {
713+
return 0;
714+
}
715+
716+
return ntohs(*((uint16_t*)UCS_PTR_BYTE_OFFSET(gid->raw, 4)));
717+
}
718+
719+
static int uct_ib_iface_is_flid_enabled(const uct_ib_iface_t *iface)
720+
{
721+
return iface->config.flid_enabled &&
722+
(uct_ib_iface_gid_extract_flid(&iface->gid_info.gid) != 0);
723+
}
724+
694725
static int uct_ib_iface_dev_addr_is_reachable(uct_ib_iface_t *iface,
695726
const uct_ib_address_t *ib_addr)
696727
{
@@ -707,9 +738,14 @@ static int uct_ib_iface_dev_addr_is_reachable(uct_ib_iface_t *iface,
707738
}
708739

709740
if (!is_local_eth && !(ib_addr->flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH)) {
710-
/* same subnet prefix */
711-
return params.gid.global.subnet_prefix ==
712-
iface->gid_info.gid.global.subnet_prefix;
741+
if (params.gid.global.subnet_prefix ==
742+
iface->gid_info.gid.global.subnet_prefix) {
743+
return 1;
744+
}
745+
746+
/* Check FLID route: is enabled locally, and remote GID has it */
747+
return (uct_ib_iface_is_flid_enabled(iface) &&
748+
uct_ib_iface_gid_extract_flid(&params.gid) != 0);
713749
} else if (is_local_eth && (ib_addr->flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH)) {
714750
/* there shouldn't be a lid and the UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH
715751
* flag should be on. If reachable, the remote and local RoCE versions
@@ -809,13 +845,35 @@ void uct_ib_iface_fill_ah_attr_from_gid_lid(uct_ib_iface_t *iface, uint16_t lid,
809845
uct_ib_ah_attr_str(buf, sizeof(buf), ah_attr));
810846
}
811847

848+
static uint16_t uct_ib_gid_site_local_subnet_prefix(const union ibv_gid *gid)
849+
{
850+
return be64toh(gid->global.subnet_prefix) & 0xffff;
851+
}
852+
853+
uint16_t uct_ib_iface_resolve_remote_flid(const uct_ib_iface_t *iface,
854+
const union ibv_gid *gid)
855+
{
856+
if (!uct_ib_iface_is_flid_enabled(iface)) {
857+
return 0;
858+
}
859+
860+
if (uct_ib_gid_site_local_subnet_prefix(gid) ==
861+
uct_ib_gid_site_local_subnet_prefix(&iface->gid_info.gid)) {
862+
/* On the same subnet, no need to use FLID*/
863+
return 0;
864+
}
865+
866+
return uct_ib_iface_gid_extract_flid(gid);
867+
}
868+
812869
void uct_ib_iface_fill_ah_attr_from_addr(uct_ib_iface_t *iface,
813870
const uct_ib_address_t *ib_addr,
814871
unsigned path_index,
815872
struct ibv_ah_attr *ah_attr,
816873
enum ibv_mtu *path_mtu)
817874
{
818875
union ibv_gid *gid = NULL;
876+
uint16_t lid, flid = 0;
819877
uct_ib_address_pack_params_t params;
820878

821879
ucs_assert(!uct_ib_iface_is_roce(iface) ==
@@ -840,12 +898,13 @@ void uct_ib_iface_fill_ah_attr_from_addr(uct_ib_iface_t *iface,
840898
UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID |
841899
UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX) ||
842900
params.flags & UCT_IB_ADDRESS_PACK_FLAG_ETH) {
843-
gid = &params.gid;
901+
gid = &params.gid;
902+
flid = uct_ib_iface_resolve_remote_flid(iface, gid);
844903
}
845904

846-
uct_ib_iface_fill_ah_attr_from_gid_lid(iface, params.lid, gid,
847-
params.gid_index, path_index,
848-
ah_attr);
905+
lid = (flid == 0) ? params.lid : flid;
906+
uct_ib_iface_fill_ah_attr_from_gid_lid(iface, lid, gid, params.gid_index,
907+
path_index, ah_attr);
849908
}
850909

851910
static ucs_status_t uct_ib_iface_init_pkey(uct_ib_iface_t *iface,
@@ -1364,6 +1423,7 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *tl_ops,
13641423
self->config.hop_limit = config->hop_limit;
13651424
self->release_desc.cb = uct_ib_iface_release_desc;
13661425
self->config.qp_type = init_attr->qp_type;
1426+
self->config.flid_enabled = config->flid_enabled;
13671427
uct_ib_iface_set_path_mtu(self, config);
13681428

13691429
if (ucs_derived_of(worker, uct_priv_worker_t)->thread_mode == UCS_THREAD_MODE_MULTI) {
@@ -1438,6 +1498,7 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *tl_ops,
14381498
/* Address scope and size */
14391499
if (uct_ib_iface_is_roce(self) || config->is_global ||
14401500
uct_ib_grh_required(uct_ib_iface_port_attr(self)) ||
1501+
uct_ib_address_gid_is_global(&self->gid_info.gid) ||
14411502
/* check ADDR_TYPE for backward compatibility */
14421503
(config->addr_type == UCT_IB_ADDRESS_TYPE_SITE_LOCAL) ||
14431504
(config->addr_type == UCT_IB_ADDRESS_TYPE_GLOBAL)) {

src/uct/ib/base/ib_iface.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ struct uct_ib_iface_config {
157157
/* Force global routing */
158158
int is_global;
159159

160+
/* Use FLID based routing */
161+
int flid_enabled;
162+
160163
/* IB SL to use (default: AUTO) */
161164
unsigned long sl;
162165

@@ -296,6 +299,7 @@ struct uct_ib_iface {
296299
uint8_t hop_limit;
297300
uint8_t qp_type;
298301
uint8_t force_global_addr;
302+
uint8_t flid_enabled;
299303
enum ibv_mtu path_mtu;
300304
uint8_t counter_set_id;
301305
} config;
@@ -578,6 +582,9 @@ void uct_ib_iface_fill_attr(uct_ib_iface_t *iface,
578582

579583
uint8_t uct_ib_iface_config_select_sl(const uct_ib_iface_config_t *ib_config);
580584

585+
uint16_t uct_ib_iface_resolve_remote_flid(const uct_ib_iface_t *iface,
586+
const union ibv_gid *gid);
587+
581588
#define UCT_IB_IFACE_FMT \
582589
"%s:%d/%s"
583590
#define UCT_IB_IFACE_ARG(_iface) \

src/uct/ib/dc/dc_mlx5.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,9 +1153,10 @@ uct_dc_mlx5_iface_fc_handler(uct_rc_iface_t *rc_iface, unsigned qp_num,
11531153
{
11541154
uct_dc_mlx5_iface_t *iface = ucs_derived_of(rc_iface, uct_dc_mlx5_iface_t);
11551155
uint8_t fc_hdr = uct_rc_fc_get_fc_hdr(hdr->am_id);
1156+
const union ibv_gid *gid;
11561157
uct_dc_fc_sender_data_t *sender;
11571158
uct_dc_fc_request_t *dc_req;
1158-
int16_t cur_wnd;
1159+
int16_t cur_wnd, flid;
11591160
ucs_status_t status;
11601161
uct_dc_mlx5_ep_t *ep;
11611162
ucs_arbiter_t *waitq;
@@ -1178,9 +1179,12 @@ uct_dc_mlx5_iface_fc_handler(uct_rc_iface_t *rc_iface, unsigned qp_num,
11781179
dc_req->super.super.func = uct_dc_mlx5_iface_fc_grant;
11791180
dc_req->super.ep = &ep->super.super;
11801181
dc_req->dct_num = imm_data;
1181-
dc_req->lid = lid;
11821182
dc_req->sender = *((uct_dc_fc_sender_data_t*)(hdr + 1));
11831183

1184+
gid = ucs_unaligned_ptr(&dc_req->sender.payload.gid);
1185+
flid = uct_ib_iface_resolve_remote_flid(&rc_iface->super, gid);
1186+
dc_req->lid = (flid == 0) ? lid : htons(flid); /* dc_req->lid is BE */
1187+
11841188
status = uct_dc_mlx5_iface_fc_grant(&dc_req->super.super);
11851189
if (status == UCS_ERR_NO_RESOURCE){
11861190
uct_dc_mlx5_ep_do_pending_fc(ep, dc_req);

src/uct/ib/rc/accel/rc_mlx5_devx.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,8 @@ ucs_status_t uct_rc_mlx5_iface_common_devx_connect_qp(
445445
iface->super.super.config.sl);
446446

447447
if (ah_attr->is_global) {
448+
UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.src_addr_index,
449+
ah_attr->grh.sgid_index);
448450
UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.hop_limit,
449451
ah_attr->grh.hop_limit);
450452
memcpy(UCT_IB_MLX5DV_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),

0 commit comments

Comments
 (0)