1414#include <ucs/type/class.h>
1515#include <ucs/datastruct/queue.h>
1616#include <sys/poll.h>
17+ #include <linux/ip.h>
1718
1819
20+ #define UCT_UD_IPV4_ADDR_LEN sizeof(struct in_addr)
21+ #define UCT_UD_IPV6_ADDR_LEN sizeof(struct in6_addr)
22+
23+ #if ENABLE_STATS
24+ static ucs_stats_class_t uct_ud_iface_stats_class = {
25+ .name = "ud_iface" ,
26+ .num_counters = UCT_UD_IFACE_STAT_LAST ,
27+ .counter_names = {
28+ [UCT_UD_IFACE_STAT_RX_DROP ] = "rx_drop"
29+ }
30+ };
31+ #endif
32+
1933SGLIB_DEFINE_LIST_FUNCTIONS (uct_ud_iface_peer_t , uct_ud_iface_peer_cmp , next )
2034SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS (uct_ud_iface_peer_t ,
2135 UCT_UD_HASH_SIZE ,
@@ -387,6 +401,39 @@ void uct_ud_iface_remove_async_handlers(uct_ud_iface_t *iface)
387401 ucs_async_remove_handler (iface -> async .timer_id , 1 );
388402}
389403
404+ /* Calculate real GIDs len. Can be either 16 (RoCEv1 or RoCEv2/IPv6)
405+ * or 4 (RoCEv2/IPv4). This len is used for packets filtering by DGIDs.
406+ *
407+ * According to Annex17_RoCEv2 (A17.4.5.2):
408+ * "The first 40 bytes of user posted UD Receive Buffers are reserved for the L3
409+ * header of the incoming packet (as per the InfiniBand Spec Section 11.4.1.2).
410+ * In RoCEv2, this area is filled up with the IP header. IPv6 header uses the
411+ * entire 40 bytes. IPv4 headers use the 20 bytes in the second half of the
412+ * reserved 40 bytes area (i.e. offset 20 from the beginning of the receive
413+ * buffer). In this case, the content of the first 20 bytes is undefined." */
414+ static void uct_ud_iface_calc_gid_len (uct_ud_iface_t * iface )
415+ {
416+ uint16_t * local_gid_u16 = (uint16_t * )iface -> super .gid .raw ;
417+
418+ /* Make sure that daddr in IPv4 resides in the last 4 bytes in GRH */
419+ UCS_STATIC_ASSERT ((UCT_IB_GRH_LEN - (20 + offsetof(struct iphdr , daddr ))) ==
420+ UCT_UD_IPV4_ADDR_LEN );
421+
422+ /* Make sure that dgid resides in the last 16 bytes in GRH */
423+ UCS_STATIC_ASSERT ((UCT_IB_GRH_LEN - offsetof(struct ibv_grh , dgid )) ==
424+ UCT_UD_IPV6_ADDR_LEN );
425+
426+ /* IPv4 mapped to IPv6 looks like: 0000:0000:0000:0000:0000:ffff:????:????,
427+ * so check for leading zeroes and verify that 11-12 bytes are 0xff.
428+ * Otherwise either RoCEv1 or RoCEv2/IPv6 are used. */
429+ if (local_gid_u16 [0 ] == 0x0000 ) {
430+ ucs_assert_always (local_gid_u16 [5 ] == 0xffff );
431+ iface -> config .gid_len = UCT_UD_IPV4_ADDR_LEN ;
432+ } else {
433+ iface -> config .gid_len = UCT_UD_IPV6_ADDR_LEN ;
434+ }
435+ }
436+
390437UCS_CLASS_INIT_FUNC (uct_ud_iface_t , uct_ud_iface_ops_t * ops , uct_md_h md ,
391438 uct_worker_h worker , const uct_iface_params_t * params ,
392439 unsigned ud_rx_priv_len ,
@@ -434,6 +481,8 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
434481 self -> rx .available = config -> super .rx .queue_len ;
435482 self -> config .tx_qp_len = config -> super .tx .queue_len ;
436483 self -> config .peer_timeout = ucs_time_from_sec (config -> peer_timeout );
484+ self -> config .check_grh_dgid = (config -> dgid_check &&
485+ (self -> super .addr_type == UCT_IB_ADDRESS_TYPE_ETH ));
437486
438487 if (config -> slow_timer_backoff <= 0. ) {
439488 ucs_error ("The slow timer back off should be > 0 (%lf)" ,
@@ -472,7 +521,7 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
472521 & config -> super .tx .mp , self -> config .tx_qp_len ,
473522 uct_ud_iface_send_skb_init , "ud_tx_skb" );
474523 if (status != UCS_OK ) {
475- goto err_mpool ;
524+ goto err_rx_mpool ;
476525 }
477526 self -> tx .skb = ucs_mpool_get (& self -> tx .mp );
478527 self -> tx .skb_inl .super .len = sizeof (uct_ud_neth_t );
@@ -485,9 +534,19 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
485534
486535 ucs_queue_head_init (& self -> rx .pending_q );
487536
537+ uct_ud_iface_calc_gid_len (self );
538+
539+ status = UCS_STATS_NODE_ALLOC (& self -> stats , & uct_ud_iface_stats_class ,
540+ self -> super .super .stats );
541+ if (status != UCS_OK ) {
542+ goto err_tx_mpool ;
543+ }
544+
488545 return UCS_OK ;
489546
490- err_mpool :
547+ err_tx_mpool :
548+ ucs_mpool_cleanup (& self -> tx .mp , 1 );
549+ err_rx_mpool :
491550 ucs_mpool_cleanup (& self -> rx .mp , 1 );
492551err_qp :
493552 ibv_destroy_qp (self -> qp );
@@ -514,6 +573,7 @@ static UCS_CLASS_CLEANUP_FUNC(uct_ud_iface_t)
514573 ucs_ptr_array_cleanup (& self -> eps );
515574 ucs_arbiter_cleanup (& self -> tx .pending_q );
516575 ucs_assert (self -> tx .pending_q_len == 0 );
576+ UCS_STATS_NODE_FREE (self -> stats );
517577 uct_ud_leave (self );
518578}
519579
@@ -527,6 +587,10 @@ ucs_config_field_t uct_ud_iface_config_table[] = {
527587 {"SLOW_TIMER_BACKOFF" , "2.0" , "Timeout multiplier for resending trigger" ,
528588 ucs_offsetof (uct_ud_iface_config_t , slow_timer_backoff ),
529589 UCS_CONFIG_TYPE_DOUBLE },
590+ {"ETH_DGID_CHECK" , "y" ,
591+ "Enable checking destination GID for incoming packets of Ethernet network\n"
592+ "Mismatched packets are silently dropped." ,
593+ ucs_offsetof (uct_ud_iface_config_t , dgid_check ), UCS_CONFIG_TYPE_BOOL },
530594 {NULL }
531595};
532596
0 commit comments