Skip to content
Merged
5 changes: 4 additions & 1 deletion felix/bpf-gpl/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ static CALI_BPF_INLINE __attribute__((noreturn)) void bpf_exit(int rc) {
#define debug_ip(ip) (bpf_htonl((ip).d))
#endif
#define ip_is_dnf(ip) (true)
#define ip_is_frag(ip) (false)

#else

Expand All @@ -253,7 +254,9 @@ static CALI_BPF_INLINE __attribute__((noreturn)) void bpf_exit(int rc) {
#endif

#define ip_is_dnf(ip) ((ip)->frag_off & bpf_htons(0x4000))
#define ip_frag_no(ip) ((ip)->frag_off & bpf_htons(0x1fff))
#define ip_is_frag(ip) ((ip)->frag_off & bpf_htons(0x3fff))
#define ip_is_first_frag(ip) (((ip)->frag_off & bpf_htons(0x3fff)) == bpf_htons(0x2000))
#define ip_is_last_frag(ip) (!((ip)->frag_off & bpf_htons(0x2000)))
#endif

#ifndef IP_FMT
Expand Down
6 changes: 5 additions & 1 deletion felix/bpf-gpl/counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include "bpf.h"

#define MAX_COUNTERS_SIZE 20
#define MAX_COUNTERS_SIZE 22

typedef __u64 counters_t[MAX_COUNTERS_SIZE];

Expand All @@ -25,6 +25,10 @@ CALI_MAP(cali_counters, 3,
struct counters_key, counters_t, 20000,
0)

CALI_MAP(cali_counters_scratch, 2,
BPF_MAP_TYPE_PERCPU_ARRAY,
__u32, counters_t, 1, 0)

static CALI_BPF_INLINE counters_t *counters_get(int ifindex)
{
struct counters_key key = {
Expand Down
52 changes: 40 additions & 12 deletions felix/bpf-gpl/fib_co_re.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,30 @@ static CALI_BPF_INLINE int try_redirect_to_peer(struct cali_tc_ctx *ctx)
return TC_ACT_UNSPEC;
}

static CALI_BPF_INLINE void fib_error_log(struct cali_tc_ctx *ctx,int rc)
{
if (rc < 0) {
CALI_DEBUG("FIB lookup failed (bad input): %d.", rc);
rc = TC_ACT_UNSPEC;
} else {
CALI_DEBUG("FIB lookup failed (FIB problem): %d.", rc);
rc = TC_ACT_UNSPEC;
}
}

static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)
{
int rc = ctx->fwd.res;
enum calico_reason reason = ctx->fwd.reason;
struct cali_tc_state *state = ctx->state;

if (rc == TC_ACT_SHOT) {
goto deny;
}

if (ctx->state->flags & CALI_ST_SKIP_REDIR_ONCE) {
goto skip_fib;
}

if (rc == CALI_RES_REDIR_BACK) {
int redir_flags = 0;
if (CALI_F_FROM_HOST) {
Expand Down Expand Up @@ -318,6 +332,27 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)
rc = bpf_fib_lookup(ctx->skb, fib_params(ctx), sizeof(struct bpf_fib_lookup),
ctx->fwd.fib_flags | BPF_FIB_LOOKUP_SKIP_NEIGH);
switch (rc) {
case BPF_FIB_LKUP_RET_FRAG_NEEDED:
/* We are not asking for an MTU check, but we may still get
* BPF_FIB_LKUP_RET_FRAG_NEEDED if the device is not yet UP
* despite the mtu being larger than the packet
* https://github.com/torvalds/linux/blob/3349ada3cffdbe4579872a004360daa31938f683/include/linux/netdevice.h#L4242
* This happens on wireguard device in FV test, but the device accepts
* forwarded packets. It should be just a start up issue and not a
* real issue in production.
*
* The irony is that if we did ask for MTU check, we would not get
* BPF_FIB_LKUP_RET_NO_NEIGH and all would proceed as expected.
* But we do not want to ask for an MTU check for various reason
* related to us growing the packets ourselves.
* https://github.com/projectcalico/calico/commit/78c85f96b2aa4ae76acfaa04bb8823c2ad76f9bd
*/
CALI_DEBUG("mtu_result %d dev %d", fib_params(ctx)->mtu_result, fib_params(ctx)->ifindex);
if (!skb_is_gso(ctx->skb) && fib_params(ctx)->mtu_result < bpf_htons(ctx->state->ip_size)) {
fib_error_log(ctx, rc);
rc = TC_ACT_UNSPEC;
break;
}
case 0:
case BPF_FIB_LKUP_RET_NO_NEIGH:
#ifdef IPVER6
Expand All @@ -327,7 +362,7 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)
#endif

if (!fib_approve(ctx, fib_params(ctx)->ifindex)) {
reason = CALI_REASON_WEP_NOT_READY;
ctx->fwd.reason = CALI_REASON_WEP_NOT_READY;
goto deny;
}

Expand All @@ -344,15 +379,8 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)
rc = bpf_redirect_neigh(fib_params(ctx)->ifindex, &nh_params, sizeof(nh_params), 0);
break;
default:
if (rc < 0) {
CALI_DEBUG("FIB lookup failed (bad input): %d.", rc);
rc = TC_ACT_UNSPEC;
} else {
CALI_DEBUG("FIB lookup failed (FIB problem): %d.", rc);
rc = TC_ACT_UNSPEC;
}

break;
fib_error_log(ctx, rc);
rc = TC_ACT_UNSPEC;
}

no_fib_redirect:
Expand Down Expand Up @@ -484,7 +512,7 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)

if (rc == TC_ACT_SHOT) {
CALI_INFO("Final result=DENY (%d). Program execution time: %lluns",
reason, prog_end_time-state->prog_start_time);
ctx->fwd.reason, prog_end_time-state->prog_start_time);
} else {
if (CALI_F_VXLAN && CALI_F_TO_HOST) {
bpf_skb_change_type(ctx->skb, PACKET_HOST);
Expand Down
261 changes: 261 additions & 0 deletions felix/bpf-gpl/ip_v4_fragment.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
// Project Calico BPF dataplane programs.
// Copyright (c) 2020-2022 Tigera, Inc. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

#ifndef __CALI_IP_V4_FRAGMENT_H__
#define __CALI_IP_V4_FRAGMENT_H__

#include "ip_addr.h"

struct frags4_key {
ipv4_addr_t src;
ipv4_addr_t dst;
__u16 id;
__u16 offset;
};

#define MAX_FRAG 1504 /* requires multiple of 8 */

struct frags4_value {
__u16 more_frags:1;
__u16 len;
__u32 __pad;
char data[MAX_FRAG];
};

CALI_MAP(cali_v4_frags, 2, BPF_MAP_TYPE_LRU_HASH, struct frags4_key, struct frags4_value, 10000, 0)

CALI_MAP(cali_v4_frgtmp, 2,
BPF_MAP_TYPE_PERCPU_ARRAY,
__u32, struct frags4_value,
1, 0)

CALI_MAP(cali_v4_frgfwd, 2, BPF_MAP_TYPE_LRU_HASH, struct frags4_fwd_key, __u32, 10000, 0)

struct frags4_fwd_key {
ipv4_addr_t src;
ipv4_addr_t dst;
__u32 ifindex; /* The stream of fragments may be crossing multiple devices */
__u16 id;
__u16 __pad;
};

static CALI_BPF_INLINE struct frags4_value *frags4_get_scratch()
{
__u32 key = 0;
return cali_v4_frgtmp_lookup_elem(&key);
}

static CALI_BPF_INLINE bool frags4_try_assemble(struct cali_tc_ctx *ctx)
{
struct frags4_key k = {
.src = ip_hdr(ctx)->saddr,
.dst = ip_hdr(ctx)->daddr,
.id = ip_hdr(ctx)->id,
};

int i, tot_len = 0;

for (i = 0; i < 10; i++) {
struct frags4_value *v = cali_v4_frags_lookup_elem(&k);

if (!v) {
CALI_DEBUG("Missing IP fragment at offset %d", k.offset);
return false;
}

tot_len += v->len;

if(!v->more_frags) {
goto assemble;
}

k.offset += v->len;
}

return false;

assemble:
CALI_DEBUG("IP FRAG: Found all fragments!");

int off = skb_l4hdr_offset(ctx);
int err = bpf_skb_change_tail(ctx->skb, off + tot_len, 0);
if (err) {
CALI_DEBUG("IP FRAG: bpf_skb_change_tail (len=%d) failed (err=%d)", tot_len, err);
goto out;
}

k.offset = 0;

for (i = 0; i < 10; i++) {
struct frags4_value *v = cali_v4_frags_lookup_elem(&k);

if (!v) {
CALI_DEBUG("IP FRAG: Missing IP fragment at offset %d", k.offset);
goto out;
}

__u16 len = v->len;
if (len == 0 || len > MAX_FRAG) {
goto out;
}
CALI_DEBUG("IP FRAG: copy %d bytes to %d", len, off);
if (bpf_skb_store_bytes(ctx->skb, off, v->data, len, 0)) {
CALI_DEBUG("IP FRAG: Failed to copy bytes");
goto out;
}

bool last = !v->more_frags;
cali_v4_frags_delete_elem(&k);

if(last) {
break;
}

k.offset += v->len;
off += v->len;
}

if (parse_packet_ip(ctx) != PARSING_OK) {
goto out;
}

/* recalculate IP csum of the restored IP header */
ip_hdr(ctx)->check = 0;
ip_hdr(ctx)->frag_off = 0;
ip_hdr(ctx)->tot_len = bpf_htons(ip_hdr(ctx)->ihl*4 + tot_len);

__wsum ip_csum = bpf_csum_diff(0, 0, (__u32 *)ctx->ip_header, sizeof(struct iphdr), 0);
int ret = bpf_l3_csum_replace(ctx->skb, skb_iphdr_offset(ctx) + offsetof(struct iphdr, check), 0, ip_csum, 0);
if (ret) {
CALI_DEBUG("IP FRAG: set L3 csum failed");
goto out;
}

/* No need to recalculate L4 csum as the concatenated data should be intact. In
* case of TCP/UDP, the pseudo IP header used to calculate the checksum does not
* change src/dst IP, protocol and UDP/TCP length stay the same.
*/

if (parse_packet_ip(ctx) != PARSING_OK) {
goto out;
}

return true;
out:
return false;
}

static CALI_BPF_INLINE bool frags4_handle(struct cali_tc_ctx *ctx)
{
struct frags4_value *v = frags4_get_scratch();

if (!v) {
goto out;
}

struct frags4_key k = {
.src = ip_hdr(ctx)->saddr,
.dst = ip_hdr(ctx)->daddr,
.id = ip_hdr(ctx)->id,
.offset = 8 * bpf_ntohs(ip_hdr(ctx)->frag_off) & 0x1fff,

};

int i;
int r_off = skb_l4hdr_offset(ctx);
bool more_frags = bpf_ntohs(ip_hdr(ctx)->frag_off) & 0x2000;

/* When we get a fragment, it may be large than the storage in the map.
* We may need to break it into multiple fragments to be able to store
* it.
*/
for (i = 0; i < 10; i++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Nice to have a comment explaining what we are doing in this block.

int sz = MAX_FRAG;
if (r_off + sz >= ctx->skb->len) {
sz = ctx->skb->len - r_off;
}
if (sz > MAX_FRAG) {
sz = MAX_FRAG;
}
if (sz <= 0) {
goto out;
}

if (bpf_skb_load_bytes(ctx->skb, r_off, v->data, sz)) {
CALI_DEBUG("IP FRAG: failed to read data");
goto out;
}
v->len = (__u16)sz;
v->more_frags = more_frags || r_off + sz < ctx->skb->len;
CALI_DEBUG("IP FRAG: frg off %d", k.offset);
CALI_DEBUG("IP FRAG: frg size %d r_off %d", sz, r_off);

if (cali_v4_frags_update_elem(&k, v, 0)) {
CALI_DEBUG("IP FRAG: Failed to save IP fragment.");
goto out;
}

r_off += sz;
k.offset += sz;
if (r_off >= ctx->skb->len) {
break;
}
}

if (!frags4_try_assemble(ctx)) {
goto out;
}


return true;

out:
return false;
}

static CALI_BPF_INLINE void frags4_record_ct(struct cali_tc_ctx *ctx)
{
struct frags4_fwd_key k = {
.src = ip_hdr(ctx)->saddr,
.dst = ip_hdr(ctx)->daddr,
.ifindex = ctx->skb->ifindex,
.id = ip_hdr(ctx)->id,
};

__u32 v = 0;

cali_v4_frgfwd_update_elem(&k, &v, 0);
CALI_DEBUG("IP FRAG: created ct from " IP_FMT " to " IP_FMT,
debug_ip(ctx->state->ip_src), debug_ip(ctx->state->ip_dst));
}

static CALI_BPF_INLINE void frags4_remove_ct(struct cali_tc_ctx *ctx)
{
struct frags4_fwd_key k = {
.src = ip_hdr(ctx)->saddr,
.dst = ip_hdr(ctx)->daddr,
.ifindex = ctx->skb->ifindex,
.id = ip_hdr(ctx)->id,
};

cali_v4_frgfwd_delete_elem(&k);
CALI_DEBUG("IP FRAG: killed ct from " IP_FMT " to " IP_FMT,
debug_ip(ctx->state->ip_src), debug_ip(ctx->state->ip_dst));
}

static CALI_BPF_INLINE bool frags4_lookup_ct(struct cali_tc_ctx *ctx)
{
struct frags4_fwd_key k = {
.src = ip_hdr(ctx)->saddr,
.dst = ip_hdr(ctx)->daddr,
.ifindex = ctx->skb->ifindex,
.id = ip_hdr(ctx)->id,
};

CALI_DEBUG("IP FRAG: lookup ct from " IP_FMT " to " IP_FMT,
debug_ip(ctx->state->ip_src), debug_ip(ctx->state->ip_dst));
return cali_v4_frgfwd_lookup_elem(&k) != NULL;
}

#endif /* __CALI_IP_V4_FRAGMENT_H__ */
Loading