Skip to content

Commit d218488

Browse files
committed
prov/udp: detect and use MTU to set max_msg_size and inject_size
For each interface detected by the udp provider, determine the MTU of the interface, and use that value to set the max_msg_size field of the fi_ep_attr and fi_tx_attr values of the fi_info element. When the MTU cannot be determined, the MTU value assumed by previous code versions (1500) is used. Signed-off-by: Martin Pokorny <[email protected]>
1 parent f236201 commit d218488

File tree

12 files changed

+150
-21
lines changed

12 files changed

+150
-21
lines changed

include/freebsd/osd.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
7676
return 0;
7777
}
7878

79+
static inline int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa)
80+
{
81+
return -1;
82+
}
83+
7984
static inline ssize_t ofi_process_vm_readv(pid_t pid,
8085
const struct iovec *local_iov,
8186
unsigned long liovcnt,
@@ -185,5 +190,3 @@ ofi_recvv_socket(SOCKET fd, const struct iovec *iov, size_t cnt, int flags)
185190
}
186191

187192
#endif /* _FREEBSD_OSD_H_ */
188-
189-

include/linux/osd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ static inline int ofi_hugepage_enabled(void)
9292

9393
size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa);
9494

95+
int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa);
96+
9597
#ifndef __NR_process_vm_readv
9698
# define __NR_process_vm_readv 310
9799
#endif

include/ofi_net.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,7 @@ struct ofi_addr_list_entry {
655655
char ipstr[INET6_ADDRSTRLEN];
656656
union ofi_sock_ip ipaddr;
657657
size_t speed;
658+
int mtu;
658659
char net_name[OFI_ADDRSTRLEN];
659660
char ifa_name[OFI_ADDRSTRLEN];
660661
uint64_t comm_caps;

include/osx/osd.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ static inline size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa)
9999
return 0;
100100
}
101101

102+
static inline int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa)
103+
{
104+
return -1;
105+
}
106+
102107
static inline int ofi_hugepage_enabled(void)
103108
{
104109
return 0;

include/windows/ifaddrs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ struct ifaddrs {
3434

3535
char ad_name[16];
3636
size_t speed;
37+
int mtu;
3738
};
3839

3940
int getifaddrs(struct ifaddrs **ifap);
4041
void freeifaddrs(struct ifaddrs *ifa);
41-

include/windows/osd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,8 @@ static inline int ofi_is_loopback_addr(struct sockaddr *addr) {
10061006

10071007
size_t ofi_ifaddr_get_speed(struct ifaddrs *ifa);
10081008

1009+
int ofi_ifaddr_get_mtu(const struct ifaddrs *ifa);
1010+
10091011
#define file2unix_time 10000000i64
10101012
#define win2unix_epoch 116444736000000000i64
10111013
#define CLOCK_REALTIME 0

prov/udp/src/udpx.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,22 +63,22 @@
6363
#ifndef _UDPX_H_
6464
#define _UDPX_H_
6565

66-
6766
extern struct fi_provider udpx_prov;
6867
extern struct util_prov udpx_util_prov;
6968
extern struct fi_info udpx_info;
7069

71-
7270
int udpx_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
7371
void *context);
7472
int udpx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
7573
struct fid_domain **dom, void *context);
7674
int udpx_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
7775
struct fid_eq **eq, void *context);
78-
76+
void udpx_util_prov_init(uint32_t version);
77+
void udpx_util_prov_fin();
7978

8079
#define UDPX_FLAG_MULTI_RECV 1
8180
#define UDPX_IOV_LIMIT 4
81+
#define UDPX_MTU 1500
8282

8383
struct udpx_ep_entry {
8484
void *context;
@@ -88,6 +88,10 @@ struct udpx_ep_entry {
8888
uint8_t resv[sizeof(size_t) - 2];
8989
};
9090

91+
#define UDPX_UDP_HEADER_SIZE 8
92+
#define UDPX_IP_HEADER_SIZE 20
93+
#define UDPX_MAX_MSG_SIZE(mtu) ((mtu) - (UDPX_UDP_HEADER_SIZE + UDPX_IP_HEADER_SIZE))
94+
9195
OFI_DECLARE_CIRQUE(struct udpx_ep_entry, udpx_rx_cirq);
9296

9397
struct udpx_ep;

prov/udp/src/udpx_attr.c

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,15 @@
3131
*/
3232

3333
#include "udpx.h"
34+
#include "ofi_osd.h"
3435

3536
#define UDPX_TX_CAPS (OFI_TX_MSG_CAPS | FI_MULTICAST)
3637
#define UDPX_RX_CAPS (FI_SOURCE | OFI_RX_MSG_CAPS)
3738
#define UDPX_DOMAIN_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM)
3839

3940
struct fi_tx_attr udpx_tx_attr = {
4041
.caps = UDPX_TX_CAPS,
41-
.inject_size = 1472,
42+
.inject_size = UDPX_MAX_MSG_SIZE(UDPX_MTU),
4243
.size = 1024,
4344
.iov_limit = UDPX_IOV_LIMIT
4445
};
@@ -53,7 +54,7 @@ struct fi_ep_attr udpx_ep_attr = {
5354
.type = FI_EP_DGRAM,
5455
.protocol = FI_PROTO_UDP,
5556
.protocol_version = 0,
56-
.max_msg_size = 1472,
57+
.max_msg_size = UDPX_MAX_MSG_SIZE(UDPX_MTU),
5758
.tx_ctx_cnt = 1,
5859
.rx_ctx_cnt = 1
5960
};
@@ -93,6 +94,69 @@ struct fi_info udpx_info = {
9394

9495
struct util_prov udpx_util_prov = {
9596
.prov = &udpx_prov,
96-
.info = &udpx_info,
97-
.flags = 0,
97+
.info = NULL,
98+
.flags = 0,
9899
};
100+
101+
102+
static int match_interface(struct slist_entry *entry, const void *infop)
103+
{
104+
struct ofi_addr_list_entry *addr_entry;
105+
const struct fi_info* info = infop;
106+
107+
addr_entry = container_of(entry, struct ofi_addr_list_entry, entry);
108+
return strcmp(addr_entry->net_name, info->fabric_attr->name) == 0 &&
109+
strcmp(addr_entry->ifa_name, info->domain_attr->name) == 0;
110+
}
111+
112+
static void set_mtu_from_addr_list(struct fi_info* info,
113+
struct slist *addr_list)
114+
{
115+
struct ofi_addr_list_entry *addr_entry;
116+
struct slist_entry *entry;
117+
int max_msg_size;
118+
119+
entry = slist_find_first_match(addr_list, match_interface, info);
120+
if (entry) {
121+
addr_entry = container_of(entry,
122+
struct ofi_addr_list_entry,
123+
entry);
124+
max_msg_size = UDPX_MAX_MSG_SIZE(addr_entry->mtu);
125+
if (max_msg_size > 0) {
126+
info->tx_attr->inject_size = max_msg_size;
127+
info->ep_attr->max_msg_size = max_msg_size;
128+
}
129+
} else {
130+
FI_DBG(&udpx_prov, FI_LOG_CORE,
131+
"Failed to match interface (%s, %s) to "
132+
"address for MTU size\n",
133+
info->fabric_attr->name, info->domain_attr->name);
134+
}
135+
}
136+
137+
void udpx_util_prov_init(uint32_t version)
138+
{
139+
140+
struct slist addr_list;
141+
struct fi_info* cur;
142+
struct fi_info* info;
143+
144+
if (udpx_util_prov.info == NULL) {
145+
udpx_util_prov.info = &udpx_info;
146+
info = fi_allocinfo();
147+
ofi_ip_getinfo(&udpx_util_prov, version, NULL, NULL, 0, NULL,
148+
&info);
149+
slist_init(&addr_list);
150+
ofi_get_list_of_addr(&udpx_prov, "iface", &addr_list);
151+
for (cur = info; cur; cur = cur->next)
152+
set_mtu_from_addr_list(cur, &addr_list);
153+
*(struct fi_info**)&udpx_util_prov.info = info;
154+
ofi_free_list_of_addr(&addr_list);
155+
}
156+
}
157+
158+
void udpx_util_prov_fin()
159+
{
160+
if (udpx_util_prov.info != NULL)
161+
fi_freeinfo(*(struct fi_info**)&udpx_util_prov.info);
162+
}

prov/udp/src/udpx_init.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,25 @@
3737

3838
#include <sys/types.h>
3939

40+
static ofi_mutex_t init_lock;
4041

4142
static int udpx_getinfo(uint32_t version, const char *node, const char *service,
4243
uint64_t flags, const struct fi_info *hints,
4344
struct fi_info **info)
4445
{
45-
return ofi_ip_getinfo(&udpx_util_prov, version, node, service, flags,
46-
hints, info);
46+
ofi_mutex_lock(&init_lock);
47+
udpx_util_prov_init(version);
48+
ofi_mutex_unlock(&init_lock);
49+
return util_getinfo(&udpx_util_prov, version, node, service, flags,
50+
hints, info);
4751
}
4852

4953
static void udpx_fini(void)
5054
{
51-
/* yawn */
55+
udpx_util_prov_fin();
5256
}
5357

58+
5459
struct fi_provider udpx_prov = {
5560
.name = "udp",
5661
.version = OFI_VERSION_DEF_PROV,
@@ -65,5 +70,6 @@ UDP_INI
6570
fi_param_define(&udpx_prov, "iface", FI_PARAM_STRING,
6671
"Specify interface name");
6772

73+
ofi_mutex_init(&init_lock);
6874
return &udpx_prov;
6975
}

src/common.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,7 +1942,8 @@ void ofi_free_list_of_addr(struct slist *addr_list)
19421942
}
19431943

19441944
static inline
1945-
void ofi_insert_loopback_addr(const struct fi_provider *prov, struct slist *addr_list)
1945+
void ofi_insert_loopback_addr(const struct fi_provider *prov,
1946+
struct slist *addr_list, int mtu)
19461947
{
19471948
struct ofi_addr_list_entry *addr_entry;
19481949

@@ -1953,6 +1954,7 @@ void ofi_insert_loopback_addr(const struct fi_provider *prov, struct slist *addr
19531954
addr_entry->comm_caps = FI_LOCAL_COMM;
19541955
addr_entry->ipaddr.sin.sin_family = AF_INET;
19551956
addr_entry->ipaddr.sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1957+
addr_entry->mtu = mtu;
19561958
ofi_straddr_log(prov, FI_LOG_INFO, FI_LOG_CORE,
19571959
"available addr: ", &addr_entry->ipaddr);
19581960

@@ -1968,6 +1970,7 @@ void ofi_insert_loopback_addr(const struct fi_provider *prov, struct slist *addr
19681970
addr_entry->comm_caps = FI_LOCAL_COMM;
19691971
addr_entry->ipaddr.sin6.sin6_family = AF_INET6;
19701972
addr_entry->ipaddr.sin6.sin6_addr = in6addr_loopback;
1973+
addr_entry->mtu = mtu;
19711974
ofi_straddr_log(prov, FI_LOG_INFO, FI_LOG_CORE,
19721975
"available addr: ", &addr_entry->ipaddr);
19731976

@@ -2062,7 +2065,7 @@ void ofi_set_netmask_str(char *netstr, size_t len, struct ifaddrs *ifa)
20622065
void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
20632066
struct slist *addr_list)
20642067
{
2065-
int ret;
2068+
int ret, mtu = -1;
20662069
char *iface = NULL;
20672070
struct ofi_addr_list_entry *addr_entry;
20682071
struct ifaddrs *ifaddrs, *ifa;
@@ -2089,10 +2092,13 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
20892092
if (ifa->ifa_addr == NULL ||
20902093
!(ifa->ifa_flags & IFF_UP) ||
20912094
!(ifa->ifa_flags & IFF_RUNNING) ||
2092-
(ifa->ifa_flags & IFF_LOOPBACK) ||
20932095
((ifa->ifa_addr->sa_family != AF_INET) &&
20942096
(ifa->ifa_addr->sa_family != AF_INET6)))
20952097
continue;
2098+
if (ifa->ifa_flags & IFF_LOOPBACK) {
2099+
mtu = ofi_ifaddr_get_mtu(ifa);
2100+
continue;
2101+
}
20962102
if (iface && strncmp(iface, ifa->ifa_name, strlen(iface) + 1)) {
20972103
FI_DBG(prov, FI_LOG_CORE,
20982104
"Skip (%s) interface\n", ifa->ifa_name);
@@ -2122,9 +2128,11 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
21222128
}
21232129

21242130
addr_entry->speed = ofi_ifaddr_get_speed(ifa);
2131+
addr_entry->mtu = ofi_ifaddr_get_mtu(ifa);
21252132
FI_INFO(prov, FI_LOG_CORE, "Available addr: %s, "
2126-
"iface name: %s, speed: %zu\n",
2127-
addr_entry->ipstr, ifa->ifa_name, addr_entry->speed);
2133+
"iface name: %s, speed: %zu, mtu: %d\n",
2134+
addr_entry->ipstr, ifa->ifa_name, addr_entry->speed,
2135+
addr_entry->mtu);
21282136

21292137
slist_insert_before_first_match(addr_list, ofi_compare_addr_entry,
21302138
&addr_entry->entry);
@@ -2136,7 +2144,7 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
21362144
/* Always add loopback address at the end */
21372145
if (!iface || !strncmp(iface, "lo", strlen(iface) + 1) ||
21382146
!strncmp(iface, "loopback", strlen(iface) + 1))
2139-
ofi_insert_loopback_addr(prov, addr_list);
2147+
ofi_insert_loopback_addr(prov, addr_list, mtu);
21402148
}
21412149

21422150
#elif defined HAVE_MIB_IPADDRTABLE
@@ -2182,7 +2190,7 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
21822190
}
21832191

21842192
/* Always add loopback address at the end */
2185-
ofi_insert_loopback_addr(prov, addr_list);
2193+
ofi_insert_loopback_addr(prov, addr_list, -1);
21862194

21872195
out:
21882196
if (iptbl != &_iptbl)
@@ -2194,7 +2202,7 @@ void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
21942202
void ofi_get_list_of_addr(const struct fi_provider *prov, const char *env_name,
21952203
struct slist *addr_list)
21962204
{
2197-
ofi_insert_loopback_addr(prov, addr_list);
2205+
ofi_insert_loopback_addr(prov, addr_list, -1);
21982206
}
21992207
#endif
22002208

0 commit comments

Comments
 (0)