|
| 1 | +#!/bin/bash |
| 2 | +# SPDX-License-Identifier: GPL-2.0 |
| 3 | +# |
| 4 | +# This tests basic flowtable functionality. |
| 5 | +# Creates following topology: |
| 6 | +# |
| 7 | +# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) |
| 8 | +# Router1 is the one doing flow offloading, Router2 has no special |
| 9 | +# purpose other than having a link that is smaller than either Originator |
| 10 | +# and responder, i.e. TCPMSS announced values are too large and will still |
| 11 | +# result in fragmentation and/or PMTU discovery. |
| 12 | + |
| 13 | +# Kselftest framework requirement - SKIP code is 4. |
| 14 | +ksft_skip=4 |
| 15 | +ret=0 |
| 16 | + |
| 17 | +ns1in="" |
| 18 | +ns2in="" |
| 19 | +ns1out="" |
| 20 | +ns2out="" |
| 21 | + |
| 22 | +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) |
| 23 | + |
| 24 | +nft --version > /dev/null 2>&1 |
| 25 | +if [ $? -ne 0 ];then |
| 26 | + echo "SKIP: Could not run test without nft tool" |
| 27 | + exit $ksft_skip |
| 28 | +fi |
| 29 | + |
| 30 | +ip -Version > /dev/null 2>&1 |
| 31 | +if [ $? -ne 0 ];then |
| 32 | + echo "SKIP: Could not run test without ip tool" |
| 33 | + exit $ksft_skip |
| 34 | +fi |
| 35 | + |
| 36 | +which nc > /dev/null 2>&1 |
| 37 | +if [ $? -ne 0 ];then |
| 38 | + echo "SKIP: Could not run test without nc (netcat)" |
| 39 | + exit $ksft_skip |
| 40 | +fi |
| 41 | + |
| 42 | +ip netns add nsr1 |
| 43 | +if [ $? -ne 0 ];then |
| 44 | + echo "SKIP: Could not create net namespace" |
| 45 | + exit $ksft_skip |
| 46 | +fi |
| 47 | + |
| 48 | +ip netns add ns1 |
| 49 | +ip netns add ns2 |
| 50 | + |
| 51 | +ip netns add nsr2 |
| 52 | + |
| 53 | +cleanup() { |
| 54 | + for i in 1 2; do |
| 55 | + ip netns del ns$i |
| 56 | + ip netns del nsr$i |
| 57 | + done |
| 58 | + |
| 59 | + rm -f "$ns1in" "$ns1out" |
| 60 | + rm -f "$ns2in" "$ns2out" |
| 61 | + |
| 62 | + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns |
| 63 | +} |
| 64 | + |
| 65 | +trap cleanup EXIT |
| 66 | + |
| 67 | +sysctl -q net.netfilter.nf_log_all_netns=1 |
| 68 | + |
| 69 | +ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 |
| 70 | +ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 |
| 71 | + |
| 72 | +ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 |
| 73 | + |
| 74 | +for dev in lo veth0 veth1; do |
| 75 | + for i in 1 2; do |
| 76 | + ip -net nsr$i link set $dev up |
| 77 | + done |
| 78 | +done |
| 79 | + |
| 80 | +ip -net nsr1 addr add 10.0.1.1/24 dev veth0 |
| 81 | +ip -net nsr1 addr add dead:1::1/64 dev veth0 |
| 82 | + |
| 83 | +ip -net nsr2 addr add 10.0.2.1/24 dev veth1 |
| 84 | +ip -net nsr2 addr add dead:2::1/64 dev veth1 |
| 85 | + |
| 86 | +# set different MTUs so we need to push packets coming from ns1 (large MTU) |
| 87 | +# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), |
| 88 | +# or to do PTMU discovery (send ICMP error back to originator). |
| 89 | +# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers |
| 90 | +# is NOT the lowest link mtu. |
| 91 | + |
| 92 | +ip -net nsr1 link set veth0 mtu 9000 |
| 93 | +ip -net ns1 link set eth0 mtu 9000 |
| 94 | + |
| 95 | +ip -net nsr2 link set veth1 mtu 2000 |
| 96 | +ip -net ns2 link set eth0 mtu 2000 |
| 97 | + |
| 98 | +# transfer-net between nsr1 and nsr2. |
| 99 | +# these addresses are not used for connections. |
| 100 | +ip -net nsr1 addr add 192.168.10.1/24 dev veth1 |
| 101 | +ip -net nsr1 addr add fee1:2::1/64 dev veth1 |
| 102 | + |
| 103 | +ip -net nsr2 addr add 192.168.10.2/24 dev veth0 |
| 104 | +ip -net nsr2 addr add fee1:2::2/64 dev veth0 |
| 105 | + |
| 106 | +for i in 1 2; do |
| 107 | + ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null |
| 108 | + ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null |
| 109 | + |
| 110 | + ip -net ns$i link set lo up |
| 111 | + ip -net ns$i link set eth0 up |
| 112 | + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 |
| 113 | + ip -net ns$i route add default via 10.0.$i.1 |
| 114 | + ip -net ns$i addr add dead:$i::99/64 dev eth0 |
| 115 | + ip -net ns$i route add default via dead:$i::1 |
| 116 | + ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null |
| 117 | + |
| 118 | + # don't set ip DF bit for first two tests |
| 119 | + ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null |
| 120 | +done |
| 121 | + |
| 122 | +ip -net nsr1 route add default via 192.168.10.2 |
| 123 | +ip -net nsr2 route add default via 192.168.10.1 |
| 124 | + |
| 125 | +ip netns exec nsr1 nft -f - <<EOF |
| 126 | +table inet filter { |
| 127 | + flowtable f1 { |
| 128 | + hook ingress priority 0 |
| 129 | + devices = { veth0, veth1 } |
| 130 | + } |
| 131 | +
|
| 132 | + chain forward { |
| 133 | + type filter hook forward priority 0; policy drop; |
| 134 | +
|
| 135 | + # flow offloaded? Tag ct with mark 1, so we can detect when it fails. |
| 136 | + meta oif "veth1" tcp dport 12345 flow offload @f1 counter |
| 137 | +
|
| 138 | + # use packet size to trigger 'should be offloaded by now'. |
| 139 | + # otherwise, if 'flow offload' expression never offloads, the |
| 140 | + # test will pass. |
| 141 | + tcp dport 12345 meta length gt 200 ct mark set 1 counter |
| 142 | +
|
| 143 | + # this turns off flow offloading internally, so expect packets again |
| 144 | + tcp flags fin,rst ct mark set 0 accept |
| 145 | +
|
| 146 | + # this allows large packets from responder, we need this as long |
| 147 | + # as PMTUd is off. |
| 148 | + # This rule is deleted for the last test, when we expect PMTUd |
| 149 | + # to kick in and ensure all packets meet mtu requirements. |
| 150 | + meta length gt 1500 accept comment something-to-grep-for |
| 151 | +
|
| 152 | + # next line blocks connection w.o. working offload. |
| 153 | + # we only do this for reverse dir, because we expect packets to |
| 154 | + # enter slow path due to MTU mismatch of veth0 and veth1. |
| 155 | + tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop |
| 156 | +
|
| 157 | + ct state established,related accept |
| 158 | +
|
| 159 | + # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) |
| 160 | + meta length lt 200 oif "veth1" tcp dport 12345 counter accept |
| 161 | +
|
| 162 | + meta nfproto ipv4 meta l4proto icmp accept |
| 163 | + meta nfproto ipv6 meta l4proto icmpv6 accept |
| 164 | + } |
| 165 | +} |
| 166 | +EOF |
| 167 | + |
| 168 | +if [ $? -ne 0 ]; then |
| 169 | + echo "SKIP: Could not load nft ruleset" |
| 170 | + exit $ksft_skip |
| 171 | +fi |
| 172 | + |
| 173 | +# test basic connectivity |
| 174 | +ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null |
| 175 | +if [ $? -ne 0 ];then |
| 176 | + echo "ERROR: ns1 cannot reach ns2" 1>&2 |
| 177 | + bash |
| 178 | + exit 1 |
| 179 | +fi |
| 180 | + |
| 181 | +ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null |
| 182 | +if [ $? -ne 0 ];then |
| 183 | + echo "ERROR: ns2 cannot reach ns1" 1>&2 |
| 184 | + exit 1 |
| 185 | +fi |
| 186 | + |
| 187 | +if [ $ret -eq 0 ];then |
| 188 | + echo "PASS: netns routing/connectivity: ns1 can reach ns2" |
| 189 | +fi |
| 190 | + |
| 191 | +ns1in=$(mktemp) |
| 192 | +ns1out=$(mktemp) |
| 193 | +ns2in=$(mktemp) |
| 194 | +ns2out=$(mktemp) |
| 195 | + |
| 196 | +make_file() |
| 197 | +{ |
| 198 | + name=$1 |
| 199 | + who=$2 |
| 200 | + |
| 201 | + SIZE=$((RANDOM % (1024 * 8))) |
| 202 | + TSIZE=$((SIZE * 1024)) |
| 203 | + |
| 204 | + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null |
| 205 | + |
| 206 | + SIZE=$((RANDOM % 1024)) |
| 207 | + SIZE=$((SIZE + 128)) |
| 208 | + TSIZE=$((TSIZE + SIZE)) |
| 209 | + dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null |
| 210 | +} |
| 211 | + |
| 212 | +check_transfer() |
| 213 | +{ |
| 214 | + in=$1 |
| 215 | + out=$2 |
| 216 | + what=$3 |
| 217 | + |
| 218 | + cmp "$in" "$out" > /dev/null 2>&1 |
| 219 | + if [ $? -ne 0 ] ;then |
| 220 | + echo "FAIL: file mismatch for $what" 1>&2 |
| 221 | + ls -l "$in" |
| 222 | + ls -l "$out" |
| 223 | + return 1 |
| 224 | + fi |
| 225 | + |
| 226 | + return 0 |
| 227 | +} |
| 228 | + |
| 229 | +test_tcp_forwarding() |
| 230 | +{ |
| 231 | + local nsa=$1 |
| 232 | + local nsb=$2 |
| 233 | + local lret=0 |
| 234 | + |
| 235 | + ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & |
| 236 | + lpid=$! |
| 237 | + |
| 238 | + sleep 1 |
| 239 | + ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" & |
| 240 | + cpid=$! |
| 241 | + |
| 242 | + sleep 3 |
| 243 | + |
| 244 | + kill $lpid |
| 245 | + kill $cpid |
| 246 | + wait |
| 247 | + |
| 248 | + check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" |
| 249 | + if [ $? -ne 0 ];then |
| 250 | + lret=1 |
| 251 | + fi |
| 252 | + |
| 253 | + check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" |
| 254 | + if [ $? -ne 0 ];then |
| 255 | + lret=1 |
| 256 | + fi |
| 257 | + |
| 258 | + return $lret |
| 259 | +} |
| 260 | + |
| 261 | +make_file "$ns1in" "ns1" |
| 262 | +make_file "$ns2in" "ns2" |
| 263 | + |
| 264 | +# First test: |
| 265 | +# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. |
| 266 | +test_tcp_forwarding ns1 ns2 |
| 267 | +if [ $? -eq 0 ] ;then |
| 268 | + echo "PASS: flow offloaded for ns1/ns2" |
| 269 | +else |
| 270 | + echo "FAIL: flow offload for ns1/ns2:" 1>&2 |
| 271 | + ip netns exec nsr1 nft list ruleset |
| 272 | + ret=1 |
| 273 | +fi |
| 274 | + |
| 275 | +# delete default route, i.e. ns2 won't be able to reach ns1 and |
| 276 | +# will depend on ns1 being masqueraded in nsr1. |
| 277 | +# expect ns1 has nsr1 address. |
| 278 | +ip -net ns2 route del default via 10.0.2.1 |
| 279 | +ip -net ns2 route del default via dead:2::1 |
| 280 | +ip -net ns2 route add 192.168.10.1 via 10.0.2.1 |
| 281 | + |
| 282 | +# Second test: |
| 283 | +# Same, but with NAT enabled. |
| 284 | +ip netns exec nsr1 nft -f - <<EOF |
| 285 | +table ip nat { |
| 286 | + chain postrouting { |
| 287 | + type nat hook postrouting priority 0; policy accept; |
| 288 | + meta oifname "veth1" masquerade |
| 289 | + } |
| 290 | +} |
| 291 | +EOF |
| 292 | + |
| 293 | +test_tcp_forwarding ns1 ns2 |
| 294 | + |
| 295 | +if [ $? -eq 0 ] ;then |
| 296 | + echo "PASS: flow offloaded for ns1/ns2 with NAT" |
| 297 | +else |
| 298 | + echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 |
| 299 | + ip netns exec nsr1 nft list ruleset |
| 300 | + ret=1 |
| 301 | +fi |
| 302 | + |
| 303 | +# Third test: |
| 304 | +# Same as second test, but with PMTU discovery enabled. |
| 305 | +handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) |
| 306 | + |
| 307 | +ip netns exec nsr1 nft delete rule inet filter forward $handle |
| 308 | +if [ $? -ne 0 ] ;then |
| 309 | + echo "FAIL: Could not delete large-packet accept rule" |
| 310 | + exit 1 |
| 311 | +fi |
| 312 | + |
| 313 | +ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null |
| 314 | +ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null |
| 315 | + |
| 316 | +test_tcp_forwarding ns1 ns2 |
| 317 | +if [ $? -eq 0 ] ;then |
| 318 | + echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" |
| 319 | +else |
| 320 | + echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 |
| 321 | + ip netns exec nsr1 nft list ruleset |
| 322 | +fi |
| 323 | + |
| 324 | +exit $ret |
0 commit comments