Skip to content

Commit 7bbb38f

Browse files
author
Alexei Starovoitov
committed
Merge branch 'support-skf_net_off-and-skf_ll_off-on-skb-frags'
Willem de Bruijn says: ==================== support SKF_NET_OFF and SKF_LL_OFF on skb frags From: Willem de Bruijn <[email protected]> Address a longstanding issue that may lead to missed packets depending on system configuration. Ensure that reading from packet contents works regardless of skb geometry, also when using the special SKF_.. negative offsets to offset from L2 or L3 header. Patch 2 is the selftest for the fix. v2->v3 - do not remove bpf_internal_load_pointer_neg_helper, because it is still used in the sparc32 JIT v1->v2 - introduce bfp_skb_load_helper_convert_offset to avoid open coding - selftest: add comment why early demux must be disabled v2: https://lore.kernel.org/netdev/[email protected]/ v1: https://lore.kernel.org/netdev/[email protected]/ ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 9bae8f4 + fcd7132 commit 7bbb38f

File tree

5 files changed

+321
-36
lines changed

5 files changed

+321
-36
lines changed

net/core/filter.c

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -218,24 +218,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
218218
return 0;
219219
}
220220

221+
static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset)
222+
{
223+
if (likely(offset >= 0))
224+
return offset;
225+
226+
if (offset >= SKF_NET_OFF)
227+
return offset - SKF_NET_OFF + skb_network_offset(skb);
228+
229+
if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb))
230+
return offset - SKF_LL_OFF + skb_mac_offset(skb);
231+
232+
return INT_MIN;
233+
}
234+
221235
BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
222236
data, int, headlen, int, offset)
223237
{
224-
u8 tmp, *ptr;
238+
u8 tmp;
225239
const int len = sizeof(tmp);
226240

227-
if (offset >= 0) {
228-
if (headlen - offset >= len)
229-
return *(u8 *)(data + offset);
230-
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
231-
return tmp;
232-
} else {
233-
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
234-
if (likely(ptr))
235-
return *(u8 *)ptr;
236-
}
241+
offset = bpf_skb_load_helper_convert_offset(skb, offset);
242+
if (offset == INT_MIN)
243+
return -EFAULT;
237244

238-
return -EFAULT;
245+
if (headlen - offset >= len)
246+
return *(u8 *)(data + offset);
247+
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
248+
return tmp;
249+
else
250+
return -EFAULT;
239251
}
240252

241253
BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
@@ -248,21 +260,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
248260
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
249261
data, int, headlen, int, offset)
250262
{
251-
__be16 tmp, *ptr;
263+
__be16 tmp;
252264
const int len = sizeof(tmp);
253265

254-
if (offset >= 0) {
255-
if (headlen - offset >= len)
256-
return get_unaligned_be16(data + offset);
257-
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
258-
return be16_to_cpu(tmp);
259-
} else {
260-
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
261-
if (likely(ptr))
262-
return get_unaligned_be16(ptr);
263-
}
266+
offset = bpf_skb_load_helper_convert_offset(skb, offset);
267+
if (offset == INT_MIN)
268+
return -EFAULT;
264269

265-
return -EFAULT;
270+
if (headlen - offset >= len)
271+
return get_unaligned_be16(data + offset);
272+
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
273+
return be16_to_cpu(tmp);
274+
else
275+
return -EFAULT;
266276
}
267277

268278
BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
@@ -275,21 +285,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
275285
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
276286
data, int, headlen, int, offset)
277287
{
278-
__be32 tmp, *ptr;
288+
__be32 tmp;
279289
const int len = sizeof(tmp);
280290

281-
if (likely(offset >= 0)) {
282-
if (headlen - offset >= len)
283-
return get_unaligned_be32(data + offset);
284-
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
285-
return be32_to_cpu(tmp);
286-
} else {
287-
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
288-
if (likely(ptr))
289-
return get_unaligned_be32(ptr);
290-
}
291+
offset = bpf_skb_load_helper_convert_offset(skb, offset);
292+
if (offset == INT_MIN)
293+
return -EFAULT;
291294

292-
return -EFAULT;
295+
if (headlen - offset >= len)
296+
return get_unaligned_be32(data + offset);
297+
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
298+
return be32_to_cpu(tmp);
299+
else
300+
return -EFAULT;
293301
}
294302

295303
BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,

tools/testing/selftests/net/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ scm_rights
3939
sk_bind_sendto_listen
4040
sk_connect_zero_addr
4141
sk_so_peek_off
42+
skf_net_off
4243
socket
4344
so_incoming_cpu
4445
so_netns_cookie

tools/testing/selftests/net/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh
106106
TEST_PROGS += busy_poll_test.sh
107107
TEST_GEN_PROGS += proc_net_pktgen
108108
TEST_PROGS += lwt_dst_cache_ref_loop.sh
109+
TEST_PROGS += skf_net_off.sh
110+
TEST_GEN_FILES += skf_net_off
109111

110112
# YNL files, must be before "include ..lib.mk"
111113
YNL_GEN_FILES := busy_poller netlink-dumps
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
/* Open a tun device.
4+
*
5+
* [modifications: use IFF_NAPI_FRAGS, add sk filter]
6+
*
7+
* Expects the device to have been configured previously, e.g.:
8+
* sudo ip tuntap add name tap1 mode tap
9+
* sudo ip link set tap1 up
10+
* sudo ip link set dev tap1 addr 02:00:00:00:00:01
11+
* sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
12+
*
13+
* And to avoid premature pskb_may_pull:
14+
*
15+
* sudo ethtool -K tap1 gro off
16+
* sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux'
17+
*/
18+
19+
#define _GNU_SOURCE
20+
21+
#include <arpa/inet.h>
22+
#include <errno.h>
23+
#include <error.h>
24+
#include <fcntl.h>
25+
#include <getopt.h>
26+
#include <linux/filter.h>
27+
#include <linux/if.h>
28+
#include <linux/if_packet.h>
29+
#include <linux/if_tun.h>
30+
#include <linux/ipv6.h>
31+
#include <netinet/if_ether.h>
32+
#include <netinet/in.h>
33+
#include <netinet/ip.h>
34+
#include <netinet/ip6.h>
35+
#include <netinet/udp.h>
36+
#include <poll.h>
37+
#include <signal.h>
38+
#include <stdbool.h>
39+
#include <stddef.h>
40+
#include <stdio.h>
41+
#include <stdlib.h>
42+
#include <string.h>
43+
#include <sys/ioctl.h>
44+
#include <sys/socket.h>
45+
#include <sys/poll.h>
46+
#include <sys/types.h>
47+
#include <sys/uio.h>
48+
#include <unistd.h>
49+
50+
static bool cfg_do_filter;
51+
static bool cfg_do_frags;
52+
static int cfg_dst_port = 8000;
53+
static char *cfg_ifname;
54+
55+
static int tun_open(const char *tun_name)
56+
{
57+
struct ifreq ifr = {0};
58+
int fd, ret;
59+
60+
fd = open("/dev/net/tun", O_RDWR);
61+
if (fd == -1)
62+
error(1, errno, "open /dev/net/tun");
63+
64+
ifr.ifr_flags = IFF_TAP;
65+
if (cfg_do_frags)
66+
ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
67+
68+
strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1);
69+
70+
ret = ioctl(fd, TUNSETIFF, &ifr);
71+
if (ret)
72+
error(1, ret, "ioctl TUNSETIFF");
73+
74+
return fd;
75+
}
76+
77+
static void sk_set_filter(int fd)
78+
{
79+
const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt);
80+
const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest);
81+
82+
/* Filter UDP packets with destination port cfg_dst_port */
83+
struct sock_filter filter_code[] = {
84+
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
85+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
86+
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto),
87+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2),
88+
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport),
89+
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0),
90+
BPF_STMT(BPF_RET + BPF_K, 0),
91+
BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
92+
};
93+
94+
struct sock_fprog filter = {
95+
sizeof(filter_code) / sizeof(filter_code[0]),
96+
filter_code,
97+
};
98+
99+
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)))
100+
error(1, errno, "setsockopt attach filter");
101+
}
102+
103+
static int raw_open(void)
104+
{
105+
int fd;
106+
107+
fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP);
108+
if (fd == -1)
109+
error(1, errno, "socket raw (udp)");
110+
111+
if (cfg_do_filter)
112+
sk_set_filter(fd);
113+
114+
return fd;
115+
}
116+
117+
static void tun_write(int fd)
118+
{
119+
const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 };
120+
const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 };
121+
struct tun_pi pi = {0};
122+
struct ipv6hdr ip6h = {0};
123+
struct udphdr uh = {0};
124+
struct ethhdr eth = {0};
125+
uint32_t payload;
126+
struct iovec iov[5];
127+
int ret;
128+
129+
pi.proto = htons(ETH_P_IPV6);
130+
131+
memcpy(eth.h_source, eth_src, sizeof(eth_src));
132+
memcpy(eth.h_dest, eth_dst, sizeof(eth_dst));
133+
eth.h_proto = htons(ETH_P_IPV6);
134+
135+
ip6h.version = 6;
136+
ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t));
137+
ip6h.nexthdr = IPPROTO_UDP;
138+
ip6h.hop_limit = 8;
139+
if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1)
140+
error(1, errno, "inet_pton src");
141+
if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1)
142+
error(1, errno, "inet_pton src");
143+
144+
uh.source = htons(8000);
145+
uh.dest = htons(cfg_dst_port);
146+
uh.len = ip6h.payload_len;
147+
uh.check = 0;
148+
149+
payload = htonl(0xABABABAB); /* Covered in IPv6 length */
150+
151+
iov[0].iov_base = &pi;
152+
iov[0].iov_len = sizeof(pi);
153+
iov[1].iov_base = &eth;
154+
iov[1].iov_len = sizeof(eth);
155+
iov[2].iov_base = &ip6h;
156+
iov[2].iov_len = sizeof(ip6h);
157+
iov[3].iov_base = &uh;
158+
iov[3].iov_len = sizeof(uh);
159+
iov[4].iov_base = &payload;
160+
iov[4].iov_len = sizeof(payload);
161+
162+
ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0]));
163+
if (ret <= 0)
164+
error(1, errno, "writev");
165+
}
166+
167+
static void raw_read(int fd)
168+
{
169+
struct timeval tv = { .tv_usec = 100 * 1000 };
170+
struct msghdr msg = {0};
171+
struct iovec iov[2];
172+
struct udphdr uh;
173+
uint32_t payload[2];
174+
int ret;
175+
176+
if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
177+
error(1, errno, "setsockopt rcvtimeo udp");
178+
179+
iov[0].iov_base = &uh;
180+
iov[0].iov_len = sizeof(uh);
181+
182+
iov[1].iov_base = payload;
183+
iov[1].iov_len = sizeof(payload);
184+
185+
msg.msg_iov = iov;
186+
msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
187+
188+
ret = recvmsg(fd, &msg, 0);
189+
if (ret <= 0)
190+
error(1, errno, "read raw");
191+
if (ret != sizeof(uh) + sizeof(payload[0]))
192+
error(1, errno, "read raw: len=%d\n", ret);
193+
194+
fprintf(stderr, "raw recv: 0x%x\n", payload[0]);
195+
}
196+
197+
static void parse_opts(int argc, char **argv)
198+
{
199+
int c;
200+
201+
while ((c = getopt(argc, argv, "fFi:")) != -1) {
202+
switch (c) {
203+
case 'f':
204+
cfg_do_filter = true;
205+
printf("bpf filter enabled\n");
206+
break;
207+
case 'F':
208+
cfg_do_frags = true;
209+
printf("napi frags mode enabled\n");
210+
break;
211+
case 'i':
212+
cfg_ifname = optarg;
213+
break;
214+
default:
215+
error(1, 0, "unknown option %c", optopt);
216+
break;
217+
}
218+
}
219+
220+
if (!cfg_ifname)
221+
error(1, 0, "must specify tap interface name (-i)");
222+
}
223+
224+
int main(int argc, char **argv)
225+
{
226+
int fdt, fdr;
227+
228+
parse_opts(argc, argv);
229+
230+
fdr = raw_open();
231+
fdt = tun_open(cfg_ifname);
232+
233+
tun_write(fdt);
234+
raw_read(fdr);
235+
236+
if (close(fdt))
237+
error(1, errno, "close tun");
238+
if (close(fdr))
239+
error(1, errno, "close udp");
240+
241+
fprintf(stderr, "OK\n");
242+
return 0;
243+
}
244+

0 commit comments

Comments
 (0)