@@ -134,6 +134,10 @@ ucs_config_field_t uct_ib_iface_config_table[] = {
134134 "Force interface to use global routing." ,
135135 ucs_offsetof (uct_ib_iface_config_t , is_global ), UCS_CONFIG_TYPE_BOOL },
136136
137+ {"FLID_ROUTE" , "y" ,
138+ "Enable FLID based routing with site-local GIDs." ,
139+ ucs_offsetof (uct_ib_iface_config_t , flid_enabled ), UCS_CONFIG_TYPE_BOOL },
140+
137141 {"SL" , "auto" ,
138142 "InfiniBand: Service level. 'auto' will select a value matching UCX_IB_AR configuration.\n"
139143 "RoCEv2: Ethernet Priority. 'auto' will select 0 by default." ,
@@ -342,6 +346,18 @@ size_t uct_ib_address_size(const uct_ib_address_pack_params_t *params)
342346 return size ;
343347}
344348
349+ static int uct_ib_address_gid_is_site_local (const union ibv_gid * gid )
350+ {
351+ return (gid -> global .subnet_prefix & UCT_IB_SITE_LOCAL_MASK ) ==
352+ UCT_IB_SITE_LOCAL_PREFIX ;
353+ }
354+
355+ static int uct_ib_address_gid_is_global (const union ibv_gid * gid )
356+ {
357+ return !uct_ib_address_gid_is_site_local (gid ) &&
358+ (gid -> global .subnet_prefix != UCT_IB_LINK_LOCAL_PREFIX );
359+ }
360+
345361void uct_ib_address_pack (const uct_ib_address_pack_params_t * params ,
346362 uct_ib_address_t * ib_addr )
347363{
@@ -375,14 +391,13 @@ void uct_ib_address_pack(const uct_ib_address_pack_params_t *params,
375391 }
376392
377393 if (params -> flags & UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX ) {
378- if ((params -> gid .global .subnet_prefix & UCT_IB_SITE_LOCAL_MASK ) ==
379- UCT_IB_SITE_LOCAL_PREFIX ) {
394+ if (uct_ib_address_gid_is_site_local (& params -> gid )) {
380395 /* Site-local */
381396 ib_addr -> flags |= UCT_IB_ADDRESS_FLAG_SUBNET16 ;
382397 * ucs_serialize_next (& ptr , uint16_t ) =
383398 params -> gid .global .subnet_prefix >> 48 ;
384- } else if (params -> gid . global . subnet_prefix != UCT_IB_LINK_LOCAL_PREFIX ) {
385- /* Global */
399+ } else if (uct_ib_address_gid_is_global ( & params -> gid ) ) {
400+ /* Global or site local GID with non-zero FLID */
386401 ib_addr -> flags |= UCT_IB_ADDRESS_FLAG_SUBNET64 ;
387402 * ucs_serialize_next (& ptr , uint64_t ) =
388403 params -> gid .global .subnet_prefix ;
@@ -691,6 +706,22 @@ int uct_ib_iface_is_same_device(const uct_ib_address_t *ib_addr, uint16_t dlid,
691706 (params .gid .global .interface_id == dgid -> global .interface_id );
692707}
693708
709+ static int uct_ib_iface_gid_extract_flid (const union ibv_gid * gid )
710+ {
711+ if ((gid -> global .subnet_prefix & UCT_IB_SITE_LOCAL_FLID_MASK ) !=
712+ UCT_IB_SITE_LOCAL_PREFIX ) {
713+ return 0 ;
714+ }
715+
716+ return ntohs (* ((uint16_t * )UCS_PTR_BYTE_OFFSET (gid -> raw , 4 )));
717+ }
718+
719+ static int uct_ib_iface_is_flid_enabled (const uct_ib_iface_t * iface )
720+ {
721+ return iface -> config .flid_enabled &&
722+ (uct_ib_iface_gid_extract_flid (& iface -> gid_info .gid ) != 0 );
723+ }
724+
694725static int uct_ib_iface_dev_addr_is_reachable (uct_ib_iface_t * iface ,
695726 const uct_ib_address_t * ib_addr )
696727{
@@ -707,9 +738,14 @@ static int uct_ib_iface_dev_addr_is_reachable(uct_ib_iface_t *iface,
707738 }
708739
709740 if (!is_local_eth && !(ib_addr -> flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH )) {
710- /* same subnet prefix */
711- return params .gid .global .subnet_prefix ==
712- iface -> gid_info .gid .global .subnet_prefix ;
741+ if (params .gid .global .subnet_prefix ==
742+ iface -> gid_info .gid .global .subnet_prefix ) {
743+ return 1 ;
744+ }
745+
746+ /* Check FLID route: is enabled locally, and remote GID has it */
747+ return (uct_ib_iface_is_flid_enabled (iface ) &&
748+ uct_ib_iface_gid_extract_flid (& params .gid ) != 0 );
713749 } else if (is_local_eth && (ib_addr -> flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH )) {
714750 /* there shouldn't be a lid and the UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH
715751 * flag should be on. If reachable, the remote and local RoCE versions
@@ -809,13 +845,35 @@ void uct_ib_iface_fill_ah_attr_from_gid_lid(uct_ib_iface_t *iface, uint16_t lid,
809845 uct_ib_ah_attr_str (buf , sizeof (buf ), ah_attr ));
810846}
811847
848+ static uint16_t uct_ib_gid_site_local_subnet_prefix (const union ibv_gid * gid )
849+ {
850+ return be64toh (gid -> global .subnet_prefix ) & 0xffff ;
851+ }
852+
853+ uint16_t uct_ib_iface_resolve_remote_flid (const uct_ib_iface_t * iface ,
854+ const union ibv_gid * gid )
855+ {
856+ if (!uct_ib_iface_is_flid_enabled (iface )) {
857+ return 0 ;
858+ }
859+
860+ if (uct_ib_gid_site_local_subnet_prefix (gid ) ==
861+ uct_ib_gid_site_local_subnet_prefix (& iface -> gid_info .gid )) {
862+ /* On the same subnet, no need to use FLID*/
863+ return 0 ;
864+ }
865+
866+ return uct_ib_iface_gid_extract_flid (gid );
867+ }
868+
812869void uct_ib_iface_fill_ah_attr_from_addr (uct_ib_iface_t * iface ,
813870 const uct_ib_address_t * ib_addr ,
814871 unsigned path_index ,
815872 struct ibv_ah_attr * ah_attr ,
816873 enum ibv_mtu * path_mtu )
817874{
818875 union ibv_gid * gid = NULL ;
876+ uint16_t lid , flid = 0 ;
819877 uct_ib_address_pack_params_t params ;
820878
821879 ucs_assert (!uct_ib_iface_is_roce (iface ) ==
@@ -840,12 +898,13 @@ void uct_ib_iface_fill_ah_attr_from_addr(uct_ib_iface_t *iface,
840898 UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID |
841899 UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX ) ||
842900 params .flags & UCT_IB_ADDRESS_PACK_FLAG_ETH ) {
843- gid = & params .gid ;
901+ gid = & params .gid ;
902+ flid = uct_ib_iface_resolve_remote_flid (iface , gid );
844903 }
845904
846- uct_ib_iface_fill_ah_attr_from_gid_lid ( iface , params .lid , gid ,
847- params .gid_index , path_index ,
848- ah_attr );
905+ lid = ( flid == 0 ) ? params .lid : flid ;
906+ uct_ib_iface_fill_ah_attr_from_gid_lid ( iface , lid , gid , params .gid_index ,
907+ path_index , ah_attr );
849908}
850909
851910static ucs_status_t uct_ib_iface_init_pkey (uct_ib_iface_t * iface ,
@@ -1364,6 +1423,7 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *tl_ops,
13641423 self -> config .hop_limit = config -> hop_limit ;
13651424 self -> release_desc .cb = uct_ib_iface_release_desc ;
13661425 self -> config .qp_type = init_attr -> qp_type ;
1426+ self -> config .flid_enabled = config -> flid_enabled ;
13671427 uct_ib_iface_set_path_mtu (self , config );
13681428
13691429 if (ucs_derived_of (worker , uct_priv_worker_t )-> thread_mode == UCS_THREAD_MODE_MULTI ) {
@@ -1438,6 +1498,7 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_iface_ops_t *tl_ops,
14381498 /* Address scope and size */
14391499 if (uct_ib_iface_is_roce (self ) || config -> is_global ||
14401500 uct_ib_grh_required (uct_ib_iface_port_attr (self )) ||
1501+ uct_ib_address_gid_is_global (& self -> gid_info .gid ) ||
14411502 /* check ADDR_TYPE for backward compatibility */
14421503 (config -> addr_type == UCT_IB_ADDRESS_TYPE_SITE_LOCAL ) ||
14431504 (config -> addr_type == UCT_IB_ADDRESS_TYPE_GLOBAL )) {
0 commit comments