@@ -1231,16 +1231,21 @@ ucp_wireup_iface_avail_bandwidth(const ucp_worker_iface_t *wiface,
12311231 double eps = 1e-3 ;
12321232 double local_bw , remote_bw ;
12331233
1234- local_bw = ucp_wireup_iface_bw_distance (wiface ) *
1235- ucp_tl_iface_bandwidth_ratio (context , local_dev_count [dev_index ],
1236- wiface -> attr .dev_num_paths );
1234+ local_bw = ucp_wireup_iface_bw_distance (wiface );
12371235
12381236 if (remote_addr -> iface_attr .addr_version == UCP_OBJECT_VERSION_V2 ) {
12391237 /* FP8 is a lossy compression method, so in order to create a symmetric
12401238 * calculation we pack/unpack the local bandwidth as well */
12411239 local_bw = ucp_wireup_fp8_pack_unpack_bw (local_bw );
12421240 }
12431241
1242+ /* Apply dev num paths ratio after fp8 pack/unpack to make sure it is not
1243+ * neglected because of fp8 inaccuracy
1244+ */
1245+ local_bw *= ucp_tl_iface_bandwidth_ratio (
1246+ context , local_dev_count [dev_index ],
1247+ wiface -> attr .dev_num_paths );
1248+
12441249 remote_bw = remote_addr -> iface_attr .bandwidth *
12451250 ucp_tl_iface_bandwidth_ratio (
12461251 context , remote_dev_count [remote_addr -> dev_index ],
@@ -1412,15 +1417,23 @@ ucp_wireup_is_md_map_count_valid(ucp_context_h context, ucp_md_map_t md_map)
14121417
14131418static double ucp_wireup_get_lane_bw (ucp_worker_h worker ,
14141419 const ucp_wireup_select_info_t * sinfo ,
1415- const ucp_address_entry_t * address )
1420+ const ucp_address_entry_t * address_list )
14161421{
14171422 ucp_context_h context = worker -> context ;
14181423 const uct_iface_attr_t * iface_attr ;
1424+ const ucp_address_entry_t * address ;
14191425 double bw_local , bw_remote ;
14201426
14211427 iface_attr = ucp_worker_iface_get_attr (worker , sinfo -> rsc_index );
14221428 bw_local = ucp_tl_iface_bandwidth (context , & iface_attr -> bandwidth );
1423- bw_remote = address [sinfo -> addr_index ].iface_attr .bandwidth ;
1429+ address = & address_list [sinfo -> addr_index ];
1430+ bw_remote = address -> iface_attr .bandwidth ;
1431+
1432+ if (address -> iface_attr .addr_version == UCP_OBJECT_VERSION_V2 ) {
1433+ /* FP8 is a lossy compression method, so in order to create a symmetric
1434+ * calculation we pack/unpack the local bandwidth as well */
1435+ bw_local = ucp_wireup_fp8_pack_unpack_bw (bw_local );
1436+ }
14241437
14251438 return ucs_min (bw_local , bw_remote );
14261439}
0 commit comments