Open
Description
Pre-requisites
- A similar question has not been reported before.
- mptcp.dev website does not cover my case.
- The wiki doesn't cover my case.
- This is not a question related to the current behavior, an issue or a feature requst: if it is, please use another template even if it is a question: we will need details about your system: kernel version, config, etc.
My question
According to my Redis benchmark test, after enabling mptcp:
the throughput decreased by 10% to 20%, and only the min latency is decreased when mptcp.scheduler=redundant.
I want to know if my conclusion is correct, and where are the main overhead ?
Hardware Information
[root@ceph3 mptcp]# lscpu
Architecture: aarch64
CPU op-mode(s): 64-bit
Byte Order: Little Endian
CPU(s): 128
On-line CPU(s) list: 0-127
Vendor ID: HiSilicon
BIOS Vendor ID: HiSilicon
Model name: Kunpeng-920
Caches (sum of all):
L1d: 8 MiB (128 instances)
L1i: 8 MiB (128 instances)
L2: 64 MiB (128 instances)
L3: 128 MiB (4 instances)
NUMA:
NUMA node(s): 4
NUMA node0 CPU(s): 0-31
NUMA node1 CPU(s): 32-63
NUMA node2 CPU(s): 64-95
NUMA node3 CPU(s): 96-127
NIC
client server
NIC1 192.168.9.30 ---> NIC3 192.168.9.34
NIC2 192.168.9.31 --/
85:00.0 Ethernet controller: Mellanox Technologies MT27800 Family [ConnectX-5]
Configuration
[root@ceph2 ~]# uname -a
Linux ceph2 6.15.0-rc7-mptcp+ #9 SMP Sat May 24 17:38:07 CST 2025 aarch64 aarch64 aarch64 GNU/Linux
[root@ceph2 ~]# sysctl -a | grep mptcp
crypto.fips_version = 6.15.0-rc7-mptcp+
kernel.osrelease = 6.15.0-rc7-mptcp+
net.ipv4.tcp_available_ulp = mptcp tls
net.mptcp.add_addr_timeout = 120
net.mptcp.allow_join_initial_addr_port = 1
net.mptcp.available_path_managers = kernel userspace
net.mptcp.available_schedulers = default bpf_rr bpf_bkup bpf_first bpf_red
net.mptcp.blackhole_timeout = 3600
net.mptcp.checksum_enabled = 0
net.mptcp.close_timeout = 60
net.mptcp.enabled = 1
net.mptcp.path_manager = kernel
net.mptcp.pm_type = 0
net.mptcp.scheduler = bpf_red
net.mptcp.stale_loss_cnt = 4
net.mptcp.syn_retrans_before_tcp_fallback = 2
Benchmark
# server
ip mptcp limits set add_addr_accepted 4 subflows 4
ip mptcp endpoint add 192.168.9.34 signal
mptcpize run numactl -C 64 redis-server /etc/redis.conf --bind 192.168.9.34
# client
ip mptcp limits set add_addr_accepted 4 subflows 4
ip mptcp endpoint add 192.168.9.30 dev $eth0 subflow
ip mptcp endpoint add 192.168.9.31 dev $eth1 subflow
mptcpize run numactl -N 2 redis-benchmark -h 192.168.9.34 -p 6379 -n 1000000 -r 1000 -c 100 --threads 12 -t set,get -d 1024
Performance
redis-set, 100 connection
baseline(disable mptcp)
size throughput avg min p50 p95 p99 max
256 133061 0.726 0.187 0.676 1.327 1.375 4.759
1024 124755 0.774 0.187 0.716 1.410 1.458 3.695
8192 64691 1.530 0.269 1.407 2.746 2.874 5.887
32768 30758 3.225 0.563 3.764 4.116 4.268 8.727
net.mptcp.scheduler = default
size throughput avg min p50 p95 p99 max
256 106005 0.921 0.131 0.866 1.684 1.756 4.767
1024 99793 0.981 0.160 0.919 1.802 1.866 4.028
8192 60468 1.637 0.251 1.532 2.980 3.100 5.831
32768 27502 3.610 1.981 3.794 4.892 5.108 9.671
net.mptcp.scheduler = bpf_bkup
size throughput avg min p50 p95 p99 max
256 104160 0.942 0.051 0.527 4.404 8.511 11.924
1024 97414 1.007 0.056 0.551 4.586 8.644 10.836
8192 58094 1.698 0.275 1.583 3.060 3.226 6.103
32768 26001 3.823 1.843 3.202 5.450 6.052 10.948
net.mptcp.scheduler = bpf_first
size throughput avg min p50 p95 p99 max
256 105990 0.922 0.048 0.514 4.380 8.492 11.183
1024 98229 0.996 0.061 0.551 4.567 8.620 11.818
8192 58383 1.693 0.283 1.578 3.047 3.223 6.042
32768 26580 3.736 1.675 3.596 5.204 5.679 10.247
net.mptcp.scheduler = bpf_red
size throughput avg min p50 p95 p99 max
256 99914 0.977 0.067 0.524 4.554 8.615 10.660
1024 90026 1.092 0.048 0.516 4.916 8.762 10.930
8192 49871 1.985 0.291 1.572 3.279 6.103 21.071
32768 12428 3.586 0.461 2.991 6.591 9.388 3000.319
net.mptcp.scheduler = bpf_rr
size throughput avg min p50 p95 p99 max
256 42437 2.344 0.032 0.058 15.879 16.263 19.444
1024 64439 1.566 0.037 0.106 12.159 16.130 18.234
8192 51396 1.922 0.320 1.652 3.314 4.900 22.111
32768 24016 4.138 0.832 3.943 7.444 11.796 51.316
redis-get, 100 connection
baseline(disable mptcp)
size throughput avg min p50 p95 p99 max
256 139383 0.696 0.115 0.647 1.268 1.322 2.170
1024 128844 0.756 0.125 0.700 1.380 1.439 4.090
8192 86762 1.118 0.243 1.023 1.935 2.063 3.844
32768 34603 2.814 0.440 2.596 3.956 6.770 17.943
net.mptcp.scheduler = default
size throughput avg min p50 p95 p99 max
256 108940 0.897 0.120 0.844 1.647 1.711 4.703
1024 100648 0.970 0.176 0.924 1.458 1.874 2.967
8192 74393 1.315 0.288 1.236 1.972 2.479 3.418
32768 31371 3.102 0.493 2.887 4.314 7.050 18.874
net.mptcp.scheduler = bpf_bkup
size throughput avg min p50 p95 p99 max
256 105104 0.929 0.051 0.511 4.460 8.612 10.692
1024 100634 0.976 0.069 0.530 4.554 8.639 10.948
8192 70450 1.388 0.083 0.746 7.311 8.900 11.775
32768 29922 3.248 0.128 2.279 9.226 10.271 21.738
net.mptcp.scheduler = bpf_first
size throughput avg min p50 p95 p99 max
256 104187 0.935 0.051 0.511 4.487 8.607 11.135
1024 98951 0.985 0.059 0.532 4.572 8.642 9.986
8192 69605 1.397 0.104 0.754 5.124 8.892 11.748
32768 30459 3.193 0.120 2.239 9.084 10.228 21.242
net.mptcp.scheduler = bpf_red
size throughput avg min p50 p95 p99 max
256 101520 0.969 0.069 0.519 4.588 8.676 11.554
1024 93593 1.049 0.067 0.554 4.714 8.727 10.114
8192 63371 1.549 0.147 0.834 5.236 8.980 11.274
32768 26481 3.684 0.075 2.898 9.020 10.452 13.991
net.mptcp.scheduler = bpf_rr
size throughput avg min p50 p95 p99 max
256 94243 1.050 0.035 0.114 10.650 16.084 18.052
1024 94282 1.037 0.037 0.199 9.460 16.044 19.738
8192 52108 1.888 0.040 0.098 11.930 15.922 17.375
32768 27839 3.510 0.517 3.319 4.714 7.316 29.018