Skip to content

Commit 46bd382

Browse files
authored
Merge pull request #1992 from brminich/topic/backport_to_v1.2.x
UCT/UD: Backport DGID packet filtering
2 parents 68620d7 + 0e471cb commit 46bd382

File tree

5 files changed

+128
-2
lines changed

5 files changed

+128
-2
lines changed

contrib/test_jenkins.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,17 @@ build_icc() {
218218
fi
219219
}
220220

221+
#
222+
# Build debug version
223+
#
224+
build_debug() {
225+
echo "==== Build with --enable-debug option ===="
226+
../contrib/configure-devel --prefix=$ucx_inst --enable-debug
227+
$MAKE clean
228+
$MAKE
229+
$MAKE distclean
230+
}
231+
221232
run_hello() {
222233
api=$1
223234
shift
@@ -483,6 +494,7 @@ run_tests() {
483494
export UCX_ERROR_MAIL_FOOTER=$JOB_URL/$BUILD_NUMBER/console
484495

485496
do_distributed_task 0 4 build_icc
497+
do_distributed_task 1 4 build_debug
486498

487499
# all are running mpi tests
488500
run_mpi_tests

src/uct/ib/ud/accel/ud_mlx5.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,14 @@ ucs_status_t uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface, int is_async)
402402

403403
len = ntohl(cqe->byte_cnt);
404404
VALGRIND_MAKE_MEM_DEFINED(packet, len);
405+
406+
if (!uct_ud_iface_check_grh(&iface->super, packet + UCT_IB_GRH_LEN,
407+
(ntohl(cqe->flags_rqpn) >> 28) & 3)) {
408+
ucs_mpool_put_inline(desc);
409+
status = UCS_ERR_NO_PROGRESS;
410+
goto out;
411+
}
412+
405413
uct_ib_mlx5_log_rx(&iface->super.super, IBV_QPT_UD, cqe, packet,
406414
uct_ud_dump_packet);
407415
uct_ud_ep_process_rx(&iface->super,

src/uct/ib/ud/base/ud_iface.c

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,22 @@
1414
#include <ucs/type/class.h>
1515
#include <ucs/datastruct/queue.h>
1616
#include <sys/poll.h>
17+
#include <linux/ip.h>
1718

1819

20+
#define UCT_UD_IPV4_ADDR_LEN sizeof(struct in_addr)
21+
#define UCT_UD_IPV6_ADDR_LEN sizeof(struct in6_addr)
22+
23+
#if ENABLE_STATS
24+
static ucs_stats_class_t uct_ud_iface_stats_class = {
25+
.name = "ud_iface",
26+
.num_counters = UCT_UD_IFACE_STAT_LAST,
27+
.counter_names = {
28+
[UCT_UD_IFACE_STAT_RX_DROP] = "rx_drop"
29+
}
30+
};
31+
#endif
32+
1933
SGLIB_DEFINE_LIST_FUNCTIONS(uct_ud_iface_peer_t, uct_ud_iface_peer_cmp, next)
2034
SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(uct_ud_iface_peer_t,
2135
UCT_UD_HASH_SIZE,
@@ -387,6 +401,39 @@ void uct_ud_iface_remove_async_handlers(uct_ud_iface_t *iface)
387401
ucs_async_remove_handler(iface->async.timer_id, 1);
388402
}
389403

404+
/* Calculate real GIDs len. Can be either 16 (RoCEv1 or RoCEv2/IPv6)
405+
* or 4 (RoCEv2/IPv4). This len is used for packets filtering by DGIDs.
406+
*
407+
* According to Annex17_RoCEv2 (A17.4.5.2):
408+
* "The first 40 bytes of user posted UD Receive Buffers are reserved for the L3
409+
* header of the incoming packet (as per the InfiniBand Spec Section 11.4.1.2).
410+
* In RoCEv2, this area is filled up with the IP header. IPv6 header uses the
411+
* entire 40 bytes. IPv4 headers use the 20 bytes in the second half of the
412+
* reserved 40 bytes area (i.e. offset 20 from the beginning of the receive
413+
* buffer). In this case, the content of the first 20 bytes is undefined." */
414+
static void uct_ud_iface_calc_gid_len(uct_ud_iface_t *iface)
415+
{
416+
uint16_t *local_gid_u16 = (uint16_t*)iface->super.gid.raw;
417+
418+
/* Make sure that daddr in IPv4 resides in the last 4 bytes in GRH */
419+
UCS_STATIC_ASSERT((UCT_IB_GRH_LEN - (20 + offsetof(struct iphdr, daddr))) ==
420+
UCT_UD_IPV4_ADDR_LEN);
421+
422+
/* Make sure that dgid resides in the last 16 bytes in GRH */
423+
UCS_STATIC_ASSERT((UCT_IB_GRH_LEN - offsetof(struct ibv_grh, dgid)) ==
424+
UCT_UD_IPV6_ADDR_LEN);
425+
426+
/* IPv4 mapped to IPv6 looks like: 0000:0000:0000:0000:0000:ffff:????:????,
427+
* so check for leading zeroes and verify that 11-12 bytes are 0xff.
428+
* Otherwise either RoCEv1 or RoCEv2/IPv6 are used. */
429+
if (local_gid_u16[0] == 0x0000) {
430+
ucs_assert_always(local_gid_u16[5] == 0xffff);
431+
iface->config.gid_len = UCT_UD_IPV4_ADDR_LEN;
432+
} else {
433+
iface->config.gid_len = UCT_UD_IPV6_ADDR_LEN;
434+
}
435+
}
436+
390437
UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
391438
uct_worker_h worker, const uct_iface_params_t *params,
392439
unsigned ud_rx_priv_len,
@@ -434,6 +481,8 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
434481
self->rx.available = config->super.rx.queue_len;
435482
self->config.tx_qp_len = config->super.tx.queue_len;
436483
self->config.peer_timeout = ucs_time_from_sec(config->peer_timeout);
484+
self->config.check_grh_dgid = (config->dgid_check &&
485+
(self->super.addr_type == UCT_IB_ADDRESS_TYPE_ETH));
437486

438487
if (config->slow_timer_backoff <= 0.) {
439488
ucs_error("The slow timer back off should be > 0 (%lf)",
@@ -472,7 +521,7 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
472521
&config->super.tx.mp, self->config.tx_qp_len,
473522
uct_ud_iface_send_skb_init, "ud_tx_skb");
474523
if (status != UCS_OK) {
475-
goto err_mpool;
524+
goto err_rx_mpool;
476525
}
477526
self->tx.skb = ucs_mpool_get(&self->tx.mp);
478527
self->tx.skb_inl.super.len = sizeof(uct_ud_neth_t);
@@ -485,9 +534,19 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
485534

486535
ucs_queue_head_init(&self->rx.pending_q);
487536

537+
uct_ud_iface_calc_gid_len(self);
538+
539+
status = UCS_STATS_NODE_ALLOC(&self->stats, &uct_ud_iface_stats_class,
540+
self->super.super.stats);
541+
if (status != UCS_OK) {
542+
goto err_tx_mpool;
543+
}
544+
488545
return UCS_OK;
489546

490-
err_mpool:
547+
err_tx_mpool:
548+
ucs_mpool_cleanup(&self->tx.mp, 1);
549+
err_rx_mpool:
491550
ucs_mpool_cleanup(&self->rx.mp, 1);
492551
err_qp:
493552
ibv_destroy_qp(self->qp);
@@ -514,6 +573,7 @@ static UCS_CLASS_CLEANUP_FUNC(uct_ud_iface_t)
514573
ucs_ptr_array_cleanup(&self->eps);
515574
ucs_arbiter_cleanup(&self->tx.pending_q);
516575
ucs_assert(self->tx.pending_q_len == 0);
576+
UCS_STATS_NODE_FREE(self->stats);
517577
uct_ud_leave(self);
518578
}
519579

@@ -527,6 +587,10 @@ ucs_config_field_t uct_ud_iface_config_table[] = {
527587
{"SLOW_TIMER_BACKOFF", "2.0", "Timeout multiplier for resending trigger",
528588
ucs_offsetof(uct_ud_iface_config_t, slow_timer_backoff),
529589
UCS_CONFIG_TYPE_DOUBLE},
590+
{"ETH_DGID_CHECK", "y",
591+
"Enable checking destination GID for incoming packets of Ethernet network\n"
592+
"Mismatched packets are silently dropped.",
593+
ucs_offsetof(uct_ud_iface_config_t, dgid_check), UCS_CONFIG_TYPE_BOOL},
530594
{NULL}
531595
};
532596

src/uct/ib/ud/base/ud_iface.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,18 @@
2323

2424
#define UCT_UD_MIN_INLINE 48
2525

26+
enum {
27+
UCT_UD_IFACE_STAT_RX_DROP,
28+
UCT_UD_IFACE_STAT_LAST
29+
};
30+
2631
/* TODO: maybe tx_moderation can be defined at compile-time since tx completions are used only to know how much space is there in tx qp */
2732

2833
typedef struct uct_ud_iface_config {
2934
uct_ib_iface_config_t super;
3035
double peer_timeout;
3136
double slow_timer_backoff;
37+
int dgid_check;
3238
} uct_ud_iface_config_t;
3339

3440
struct uct_ud_iface_peer {
@@ -123,7 +129,12 @@ struct uct_ud_iface {
123129
double slow_timer_backoff;
124130
unsigned tx_qp_len;
125131
unsigned max_inline;
132+
int check_grh_dgid;
133+
unsigned gid_len;
126134
} config;
135+
136+
UCS_STATS_NODE_DECLARE(stats);
137+
127138
ucs_ptr_array_t eps;
128139
uct_ud_iface_peer_t *peers[UCT_UD_HASH_SIZE];
129140
struct {
@@ -215,6 +226,32 @@ static UCS_F_ALWAYS_INLINE void uct_ud_leave(uct_ud_iface_t *iface)
215226
UCS_ASYNC_UNBLOCK(iface->super.super.worker->async);
216227
}
217228

229+
static UCS_F_ALWAYS_INLINE int
230+
uct_ud_iface_check_grh(uct_ud_iface_t *iface, void *grh_end, int is_grh_present)
231+
{
232+
void *dest_gid, *local_gid;
233+
234+
if (!iface->config.check_grh_dgid) {
235+
return 1;
236+
}
237+
238+
if (ucs_unlikely(!is_grh_present)) {
239+
ucs_warn("RoCE packet does not contain GRH");
240+
return 1;
241+
}
242+
243+
local_gid = (char*)iface->super.gid.raw + (16 - iface->config.gid_len);
244+
dest_gid = (char*)grh_end - iface->config.gid_len;
245+
246+
if (memcmp(local_gid, dest_gid, iface->config.gid_len)) {
247+
UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_UD_IFACE_STAT_RX_DROP, 1);
248+
ucs_trace_data("Drop packet with wrong dgid");
249+
return 0;
250+
}
251+
252+
return 1;
253+
}
254+
218255
/*
219256
management of connecting endpoints (cep)
220257

src/uct/ib/ud/verbs/ud_verbs.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,11 @@ uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface, int is_async)
323323
}
324324

325325
UCT_IB_IFACE_VERBS_FOREACH_RXWQE(&iface->super.super, i, packet, wc, num_wcs) {
326+
if (!uct_ud_iface_check_grh(&iface->super, packet + UCT_IB_GRH_LEN,
327+
wc[i].wc_flags & IBV_WC_GRH)) {
328+
ucs_mpool_put_inline((void*)wc[i].wr_id);
329+
continue;
330+
}
326331
uct_ib_log_recv_completion(&iface->super.super, IBV_QPT_UD, &wc[i],
327332
packet, wc[i].byte_len, uct_ud_dump_packet);
328333
uct_ud_ep_process_rx(&iface->super,

0 commit comments

Comments
 (0)