Skip to content

Commit 2de03b4

Browse files
Florian Westphalummakynes
authored andcommitted
selftests: netfilter: add flowtable test script
Exercises 3 cases: 1. no pmtu discovery (need to frag) 2. no PMTUd + NAT (don't flag packets as invalid from conntrack) 3. PMTU + NAT (need to send icmp error) The first two cases make sure we handle fragments correctly, i.e. pass them to classic forwarding path. Third case checks we offload everything (in the test case, PMTUd will kick in so all packets should be within link mtu). Nftables rules will filter packets that are supposed to be handled by the fast-path. Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 69aeb53 commit 2de03b4

File tree

2 files changed

+325
-1
lines changed

2 files changed

+325
-1
lines changed

tools/testing/selftests/netfilter/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# Makefile for netfilter selftests
33

44
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
5-
conntrack_icmp_related.sh
5+
conntrack_icmp_related.sh nft_flowtable.sh
66

77
include ../lib.mk
Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
#!/bin/bash
2+
# SPDX-License-Identifier: GPL-2.0
3+
#
4+
# This tests basic flowtable functionality.
5+
# Creates following topology:
6+
#
7+
# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8+
# Router1 is the one doing flow offloading, Router2 has no special
9+
# purpose other than having a link that is smaller than either Originator
10+
# and responder, i.e. TCPMSS announced values are too large and will still
11+
# result in fragmentation and/or PMTU discovery.
12+
13+
# Kselftest framework requirement - SKIP code is 4.
14+
ksft_skip=4
15+
ret=0
16+
17+
ns1in=""
18+
ns2in=""
19+
ns1out=""
20+
ns2out=""
21+
22+
log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
23+
24+
nft --version > /dev/null 2>&1
25+
if [ $? -ne 0 ];then
26+
echo "SKIP: Could not run test without nft tool"
27+
exit $ksft_skip
28+
fi
29+
30+
ip -Version > /dev/null 2>&1
31+
if [ $? -ne 0 ];then
32+
echo "SKIP: Could not run test without ip tool"
33+
exit $ksft_skip
34+
fi
35+
36+
which nc > /dev/null 2>&1
37+
if [ $? -ne 0 ];then
38+
echo "SKIP: Could not run test without nc (netcat)"
39+
exit $ksft_skip
40+
fi
41+
42+
ip netns add nsr1
43+
if [ $? -ne 0 ];then
44+
echo "SKIP: Could not create net namespace"
45+
exit $ksft_skip
46+
fi
47+
48+
ip netns add ns1
49+
ip netns add ns2
50+
51+
ip netns add nsr2
52+
53+
cleanup() {
54+
for i in 1 2; do
55+
ip netns del ns$i
56+
ip netns del nsr$i
57+
done
58+
59+
rm -f "$ns1in" "$ns1out"
60+
rm -f "$ns2in" "$ns2out"
61+
62+
[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
63+
}
64+
65+
trap cleanup EXIT
66+
67+
sysctl -q net.netfilter.nf_log_all_netns=1
68+
69+
ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
70+
ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
71+
72+
ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
73+
74+
for dev in lo veth0 veth1; do
75+
for i in 1 2; do
76+
ip -net nsr$i link set $dev up
77+
done
78+
done
79+
80+
ip -net nsr1 addr add 10.0.1.1/24 dev veth0
81+
ip -net nsr1 addr add dead:1::1/64 dev veth0
82+
83+
ip -net nsr2 addr add 10.0.2.1/24 dev veth1
84+
ip -net nsr2 addr add dead:2::1/64 dev veth1
85+
86+
# set different MTUs so we need to push packets coming from ns1 (large MTU)
87+
# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
88+
# or to do PTMU discovery (send ICMP error back to originator).
89+
# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
90+
# is NOT the lowest link mtu.
91+
92+
ip -net nsr1 link set veth0 mtu 9000
93+
ip -net ns1 link set eth0 mtu 9000
94+
95+
ip -net nsr2 link set veth1 mtu 2000
96+
ip -net ns2 link set eth0 mtu 2000
97+
98+
# transfer-net between nsr1 and nsr2.
99+
# these addresses are not used for connections.
100+
ip -net nsr1 addr add 192.168.10.1/24 dev veth1
101+
ip -net nsr1 addr add fee1:2::1/64 dev veth1
102+
103+
ip -net nsr2 addr add 192.168.10.2/24 dev veth0
104+
ip -net nsr2 addr add fee1:2::2/64 dev veth0
105+
106+
for i in 1 2; do
107+
ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
108+
ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
109+
110+
ip -net ns$i link set lo up
111+
ip -net ns$i link set eth0 up
112+
ip -net ns$i addr add 10.0.$i.99/24 dev eth0
113+
ip -net ns$i route add default via 10.0.$i.1
114+
ip -net ns$i addr add dead:$i::99/64 dev eth0
115+
ip -net ns$i route add default via dead:$i::1
116+
ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
117+
118+
# don't set ip DF bit for first two tests
119+
ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
120+
done
121+
122+
ip -net nsr1 route add default via 192.168.10.2
123+
ip -net nsr2 route add default via 192.168.10.1
124+
125+
ip netns exec nsr1 nft -f - <<EOF
126+
table inet filter {
127+
flowtable f1 {
128+
hook ingress priority 0
129+
devices = { veth0, veth1 }
130+
}
131+
132+
chain forward {
133+
type filter hook forward priority 0; policy drop;
134+
135+
# flow offloaded? Tag ct with mark 1, so we can detect when it fails.
136+
meta oif "veth1" tcp dport 12345 flow offload @f1 counter
137+
138+
# use packet size to trigger 'should be offloaded by now'.
139+
# otherwise, if 'flow offload' expression never offloads, the
140+
# test will pass.
141+
tcp dport 12345 meta length gt 200 ct mark set 1 counter
142+
143+
# this turns off flow offloading internally, so expect packets again
144+
tcp flags fin,rst ct mark set 0 accept
145+
146+
# this allows large packets from responder, we need this as long
147+
# as PMTUd is off.
148+
# This rule is deleted for the last test, when we expect PMTUd
149+
# to kick in and ensure all packets meet mtu requirements.
150+
meta length gt 1500 accept comment something-to-grep-for
151+
152+
# next line blocks connection w.o. working offload.
153+
# we only do this for reverse dir, because we expect packets to
154+
# enter slow path due to MTU mismatch of veth0 and veth1.
155+
tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
156+
157+
ct state established,related accept
158+
159+
# for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
160+
meta length lt 200 oif "veth1" tcp dport 12345 counter accept
161+
162+
meta nfproto ipv4 meta l4proto icmp accept
163+
meta nfproto ipv6 meta l4proto icmpv6 accept
164+
}
165+
}
166+
EOF
167+
168+
if [ $? -ne 0 ]; then
169+
echo "SKIP: Could not load nft ruleset"
170+
exit $ksft_skip
171+
fi
172+
173+
# test basic connectivity
174+
ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
175+
if [ $? -ne 0 ];then
176+
echo "ERROR: ns1 cannot reach ns2" 1>&2
177+
bash
178+
exit 1
179+
fi
180+
181+
ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
182+
if [ $? -ne 0 ];then
183+
echo "ERROR: ns2 cannot reach ns1" 1>&2
184+
exit 1
185+
fi
186+
187+
if [ $ret -eq 0 ];then
188+
echo "PASS: netns routing/connectivity: ns1 can reach ns2"
189+
fi
190+
191+
ns1in=$(mktemp)
192+
ns1out=$(mktemp)
193+
ns2in=$(mktemp)
194+
ns2out=$(mktemp)
195+
196+
make_file()
197+
{
198+
name=$1
199+
who=$2
200+
201+
SIZE=$((RANDOM % (1024 * 8)))
202+
TSIZE=$((SIZE * 1024))
203+
204+
dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
205+
206+
SIZE=$((RANDOM % 1024))
207+
SIZE=$((SIZE + 128))
208+
TSIZE=$((TSIZE + SIZE))
209+
dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
210+
}
211+
212+
check_transfer()
213+
{
214+
in=$1
215+
out=$2
216+
what=$3
217+
218+
cmp "$in" "$out" > /dev/null 2>&1
219+
if [ $? -ne 0 ] ;then
220+
echo "FAIL: file mismatch for $what" 1>&2
221+
ls -l "$in"
222+
ls -l "$out"
223+
return 1
224+
fi
225+
226+
return 0
227+
}
228+
229+
test_tcp_forwarding()
230+
{
231+
local nsa=$1
232+
local nsb=$2
233+
local lret=0
234+
235+
ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
236+
lpid=$!
237+
238+
sleep 1
239+
ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
240+
cpid=$!
241+
242+
sleep 3
243+
244+
kill $lpid
245+
kill $cpid
246+
wait
247+
248+
check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
249+
if [ $? -ne 0 ];then
250+
lret=1
251+
fi
252+
253+
check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
254+
if [ $? -ne 0 ];then
255+
lret=1
256+
fi
257+
258+
return $lret
259+
}
260+
261+
make_file "$ns1in" "ns1"
262+
make_file "$ns2in" "ns2"
263+
264+
# First test:
265+
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
266+
test_tcp_forwarding ns1 ns2
267+
if [ $? -eq 0 ] ;then
268+
echo "PASS: flow offloaded for ns1/ns2"
269+
else
270+
echo "FAIL: flow offload for ns1/ns2:" 1>&2
271+
ip netns exec nsr1 nft list ruleset
272+
ret=1
273+
fi
274+
275+
# delete default route, i.e. ns2 won't be able to reach ns1 and
276+
# will depend on ns1 being masqueraded in nsr1.
277+
# expect ns1 has nsr1 address.
278+
ip -net ns2 route del default via 10.0.2.1
279+
ip -net ns2 route del default via dead:2::1
280+
ip -net ns2 route add 192.168.10.1 via 10.0.2.1
281+
282+
# Second test:
283+
# Same, but with NAT enabled.
284+
ip netns exec nsr1 nft -f - <<EOF
285+
table ip nat {
286+
chain postrouting {
287+
type nat hook postrouting priority 0; policy accept;
288+
meta oifname "veth1" masquerade
289+
}
290+
}
291+
EOF
292+
293+
test_tcp_forwarding ns1 ns2
294+
295+
if [ $? -eq 0 ] ;then
296+
echo "PASS: flow offloaded for ns1/ns2 with NAT"
297+
else
298+
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
299+
ip netns exec nsr1 nft list ruleset
300+
ret=1
301+
fi
302+
303+
# Third test:
304+
# Same as second test, but with PMTU discovery enabled.
305+
handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
306+
307+
ip netns exec nsr1 nft delete rule inet filter forward $handle
308+
if [ $? -ne 0 ] ;then
309+
echo "FAIL: Could not delete large-packet accept rule"
310+
exit 1
311+
fi
312+
313+
ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
314+
ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
315+
316+
test_tcp_forwarding ns1 ns2
317+
if [ $? -eq 0 ] ;then
318+
echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
319+
else
320+
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
321+
ip netns exec nsr1 nft list ruleset
322+
fi
323+
324+
exit $ret

0 commit comments

Comments
 (0)