Skip to content

Commit bb0d8a3

Browse files
committed
[BPF] handle fragmentation over vxlan
Forwarding would create fragmented VXLAN packet. First let it be fragmented and then route it into vxlan. Easier to handle.
1 parent 71cc07f commit bb0d8a3

File tree

4 files changed

+28
-5
lines changed

4 files changed

+28
-5
lines changed

felix/bpf-gpl/fib_co_re.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)
3737
goto deny;
3838
}
3939

40+
if (ctx->state->flags & CALI_ST_SKIP_REDIR_ONCE) {
41+
goto skip_fib;
42+
}
43+
4044
if (rc == CALI_RES_REDIR_BACK) {
4145
int redir_flags = 0;
4246
if (CALI_F_FROM_HOST) {
@@ -174,7 +178,9 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)
174178
}
175179
}
176180
} else if (CALI_F_VXLAN && CALI_F_TO_HEP) {
177-
if (!(ctx->skb->mark & CALI_SKB_MARK_SEEN) || (ctx->fwd.mark & CALI_SKB_MARK_FROM_NAT_IFACE_OUT)) {
181+
if (!(ctx->skb->mark & CALI_SKB_MARK_SEEN) ||
182+
ctx->state->flags & CALI_ST_IS_FRAG || /* frags go through host and don't have key set yet */
183+
(ctx->fwd.mark & CALI_SKB_MARK_FROM_NAT_IFACE_OUT) == CALI_SKB_MARK_FROM_NAT_IFACE_OUT) {
178184
/* packet to vxlan from the host, needs to set tunnel key. Either
179185
* it wasn't seen or it was routed via the bpfnat device because
180186
* its destination was a service and CTLB is disabled

felix/bpf-gpl/tc.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ int calico_tc_main(struct __sk_buff *skb)
207207
if (!frags4_handle(ctx)) {
208208
goto deny;
209209
}
210+
/* force it through stack to trigger any further necessary fragmentation */
211+
ctx->state->flags |= CALI_ST_SKIP_REDIR_ONCE;
210212
}
211213
#endif
212214

@@ -276,6 +278,7 @@ static CALI_BPF_INLINE int pre_policy_processing(struct cali_tc_ctx *ctx)
276278
if (ip_is_last_frag(ip_hdr(ctx))) {
277279
frags4_remove_ct(ctx);
278280
}
281+
ctx->state->flags |= CALI_ST_IS_FRAG;
279282
goto allow;
280283
}
281284
}
@@ -1292,6 +1295,7 @@ int calico_tc_skb_accepted_entrypoint(struct __sk_buff *skb)
12921295
#ifndef IPVER6
12931296
if (CALI_F_FROM_HOST && ip_is_first_frag(ip_hdr(ctx))) {
12941297
frags4_record_ct(ctx);
1298+
ctx->state->flags |= CALI_ST_IS_FRAG;
12951299
}
12961300
#endif
12971301

@@ -1376,7 +1380,7 @@ int calico_tc_skb_new_flow_entrypoint(struct __sk_buff *skb)
13761380
if (CALI_F_TO_HOST && state->flags & CALI_ST_SKIP_FIB) {
13771381
ct_ctx_nat->flags |= CALI_CT_FLAG_SKIP_FIB;
13781382
}
1379-
if (CALI_F_FROM_HEP && state->flags & CALI_ST_SKIP_REDIR_PEER) {
1383+
if (state->flags & CALI_ST_SKIP_REDIR_PEER) {
13801384
ct_ctx_nat->flags |= CALI_CT_FLAG_SKIP_REDIR_PEER;
13811385
}
13821386
if (CALI_F_TO_WEP) {

felix/bpf-gpl/types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ enum cali_state_flags {
156156
CALI_ST_LOG_PACKET = 0x400,
157157
/* CALI_ST_SKIP_REDIR_PEER is set when the packet is destined to a local VM workload */
158158
CALI_ST_SKIP_REDIR_PEER = 0x800,
159+
/* CALI_ST_SKIP_REDIR_ONCE skips redirection once for this particular packet */
160+
CALI_ST_SKIP_REDIR_ONCE = 0x1000,
161+
/* CALI_ST_IS_FRAG marks a packet fragment */
162+
CALI_ST_IS_FRAG = 0x2000,
159163
};
160164

161165
struct fwd {

felix/fv/bpf_test.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
440440
options.ExtraEnvVars["FELIX_HEALTHHOST"] = "::"
441441
}
442442

443-
if testOpts.protocol == "tcp" {
443+
if false && testOpts.protocol == "tcp" {
444444
filters := map[string]string{"all": "tcp"}
445445
tcpResetTimeout := api.BPFConntrackTimeout("5s")
446446
felixConfig := api.NewFelixConfiguration()
@@ -1737,9 +1737,18 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
17371737
cc.CheckConnectivity(conntrackChecks(tc.Felixes)...)
17381738
})
17391739

1740-
_ = testOpts.protocol == "udp" && !testOpts.ipv6 && !testOpts.dsr && testOpts.tunnel == "none" &&
1740+
_ = !testOpts.ipv6 && !testOpts.dsr &&
17411741
It("should handle fragmented UDP", func() {
1742-
tcpdump1 := tc.Felixes[1].AttachTCPDump("eth0")
1742+
dev := "eth0"
1743+
switch testOpts.tunnel {
1744+
case "vxlan":
1745+
dev = "vxlan.calico"
1746+
case "ipip":
1747+
dev = "tunl0"
1748+
case "wireguard":
1749+
dev = "wireguard.cali"
1750+
}
1751+
tcpdump1 := tc.Felixes[1].AttachTCPDump(dev)
17431752
tcpdump1.SetLogEnabled(true)
17441753
tcpdump1.AddMatcher("udp-frags", regexp.MustCompile(
17451754
fmt.Sprintf("%s.* > %s.*", w[1][0].IP, w[0][0].IP)))

0 commit comments

Comments
 (0)