Skip to content

Commit 6a5ef90

Browse files
committed
Merge branch 'faster-soreuseport'
Craig Gallek says: ==================== Faster SO_REUSEPORT This series contains two optimizations for the SO_REUSEPORT feature: Faster lookup when selecting a socket for an incoming packet and the ability to select the socket from the group using a BPF program. This series only includes the UDP path. I plan to submit a follow-up including the TCP path if the implementation in this series is acceptable. Changes in v4: - pskb_may_pull is unnecessary with pskb_pull (per Alexei Starovoitov) Changes in v3: - skb_pull_inline -> pskb_pull (per Alexei Starovoitov) - reuseport_attach* -> sk_reuseport_attach* and simple return statement syntax change (per Daniel Borkmann) Changes in v2: - Fix ARM build; remove unnecessary include. - Handle case where protocol header is not in linear section (per Alexei Starovoitov). ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents ebb3cf4 + 3ca8e40 commit 6a5ef90

File tree

29 files changed

+1076
-69
lines changed

29 files changed

+1076
-69
lines changed

arch/alpha/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,7 @@
9292
#define SO_ATTACH_BPF 50
9393
#define SO_DETACH_BPF SO_DETACH_FILTER
9494

95+
#define SO_ATTACH_REUSEPORT_CBPF 51
96+
#define SO_ATTACH_REUSEPORT_EBPF 52
97+
9598
#endif /* _UAPI_ASM_SOCKET_H */

arch/avr32/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,7 @@
8585
#define SO_ATTACH_BPF 50
8686
#define SO_DETACH_BPF SO_DETACH_FILTER
8787

88+
#define SO_ATTACH_REUSEPORT_CBPF 51
89+
#define SO_ATTACH_REUSEPORT_EBPF 52
90+
8891
#endif /* _UAPI__ASM_AVR32_SOCKET_H */

arch/frv/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,5 +85,8 @@
8585
#define SO_ATTACH_BPF 50
8686
#define SO_DETACH_BPF SO_DETACH_FILTER
8787

88+
#define SO_ATTACH_REUSEPORT_CBPF 51
89+
#define SO_ATTACH_REUSEPORT_EBPF 52
90+
8891
#endif /* _ASM_SOCKET_H */
8992

arch/ia64/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,7 @@
9494
#define SO_ATTACH_BPF 50
9595
#define SO_DETACH_BPF SO_DETACH_FILTER
9696

97+
#define SO_ATTACH_REUSEPORT_CBPF 51
98+
#define SO_ATTACH_REUSEPORT_EBPF 52
99+
97100
#endif /* _ASM_IA64_SOCKET_H */

arch/m32r/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,7 @@
8585
#define SO_ATTACH_BPF 50
8686
#define SO_DETACH_BPF SO_DETACH_FILTER
8787

88+
#define SO_ATTACH_REUSEPORT_CBPF 51
89+
#define SO_ATTACH_REUSEPORT_EBPF 52
90+
8891
#endif /* _ASM_M32R_SOCKET_H */

arch/mips/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,7 @@
103103
#define SO_ATTACH_BPF 50
104104
#define SO_DETACH_BPF SO_DETACH_FILTER
105105

106+
#define SO_ATTACH_REUSEPORT_CBPF 51
107+
#define SO_ATTACH_REUSEPORT_EBPF 52
108+
106109
#endif /* _UAPI_ASM_SOCKET_H */

arch/mn10300/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,7 @@
8585
#define SO_ATTACH_BPF 50
8686
#define SO_DETACH_BPF SO_DETACH_FILTER
8787

88+
#define SO_ATTACH_REUSEPORT_CBPF 51
89+
#define SO_ATTACH_REUSEPORT_EBPF 52
90+
8891
#endif /* _ASM_SOCKET_H */

arch/parisc/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,7 @@
8484
#define SO_ATTACH_BPF 0x402B
8585
#define SO_DETACH_BPF SO_DETACH_FILTER
8686

87+
#define SO_ATTACH_REUSEPORT_CBPF 0x402C
88+
#define SO_ATTACH_REUSEPORT_EBPF 0x402D
89+
8790
#endif /* _UAPI_ASM_SOCKET_H */

arch/powerpc/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,7 @@
9292
#define SO_ATTACH_BPF 50
9393
#define SO_DETACH_BPF SO_DETACH_FILTER
9494

95+
#define SO_ATTACH_REUSEPORT_CBPF 51
96+
#define SO_ATTACH_REUSEPORT_EBPF 52
97+
9598
#endif /* _ASM_POWERPC_SOCKET_H */

arch/s390/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,7 @@
9191
#define SO_ATTACH_BPF 50
9292
#define SO_DETACH_BPF SO_DETACH_FILTER
9393

94+
#define SO_ATTACH_REUSEPORT_CBPF 51
95+
#define SO_ATTACH_REUSEPORT_EBPF 52
96+
9497
#endif /* _ASM_SOCKET_H */

arch/sparc/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@
8181
#define SO_ATTACH_BPF 0x0034
8282
#define SO_DETACH_BPF SO_DETACH_FILTER
8383

84+
#define SO_ATTACH_REUSEPORT_CBPF 0x0035
85+
#define SO_ATTACH_REUSEPORT_EBPF 0x0036
86+
8487
/* Security levels - as per NRL IPv6 - don't actually do anything */
8588
#define SO_SECURITY_AUTHENTICATION 0x5001
8689
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002

arch/xtensa/include/uapi/asm/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,7 @@
9696
#define SO_ATTACH_BPF 50
9797
#define SO_DETACH_BPF SO_DETACH_FILTER
9898

99+
#define SO_ATTACH_REUSEPORT_CBPF 51
100+
#define SO_ATTACH_REUSEPORT_EBPF 52
101+
99102
#endif /* _XTENSA_SOCKET_H */

include/linux/filter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp);
447447

448448
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
449449
int sk_attach_bpf(u32 ufd, struct sock *sk);
450+
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
451+
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
450452
int sk_detach_filter(struct sock *sk);
451453
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
452454
unsigned int len);

include/net/addrconf.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
8787
u32 banned_flags);
8888
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
8989
u32 banned_flags);
90-
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2);
90+
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
91+
bool match_wildcard);
9192
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
9293
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
9394

include/net/sock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ struct cg_proto;
318318
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
319319
* @sk_backlog_rcv: callback to process the backlog
320320
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
321+
* @sk_reuseport_cb: reuseport group container
321322
*/
322323
struct sock {
323324
/*
@@ -453,6 +454,7 @@ struct sock {
453454
int (*sk_backlog_rcv)(struct sock *sk,
454455
struct sk_buff *skb);
455456
void (*sk_destruct)(struct sock *sk);
457+
struct sock_reuseport __rcu *sk_reuseport_cb;
456458
};
457459

458460
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))

include/net/sock_reuseport.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#ifndef _SOCK_REUSEPORT_H
2+
#define _SOCK_REUSEPORT_H
3+
4+
#include <linux/filter.h>
5+
#include <linux/skbuff.h>
6+
#include <linux/types.h>
7+
#include <net/sock.h>
8+
9+
struct sock_reuseport {
10+
struct rcu_head rcu;
11+
12+
u16 max_socks; /* length of socks */
13+
u16 num_socks; /* elements in socks */
14+
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
15+
struct sock *socks[0]; /* array of sock pointers */
16+
};
17+
18+
extern int reuseport_alloc(struct sock *sk);
19+
extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
20+
extern void reuseport_detach_sock(struct sock *sk);
21+
extern struct sock *reuseport_select_sock(struct sock *sk,
22+
u32 hash,
23+
struct sk_buff *skb,
24+
int hdr_len);
25+
extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
26+
struct bpf_prog *prog);
27+
28+
#endif /* _SOCK_REUSEPORT_H */

include/net/udp.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
191191
}
192192

193193
int udp_lib_get_port(struct sock *sk, unsigned short snum,
194-
int (*)(const struct sock *, const struct sock *),
194+
int (*)(const struct sock *, const struct sock *, bool),
195195
unsigned int hash2_nulladdr);
196196

197197
u32 udp_flow_hashrnd(void);
@@ -258,15 +258,16 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
258258
__be32 daddr, __be16 dport, int dif);
259259
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
260260
__be32 daddr, __be16 dport, int dif,
261-
struct udp_table *tbl);
261+
struct udp_table *tbl, struct sk_buff *skb);
262262
struct sock *udp6_lib_lookup(struct net *net,
263263
const struct in6_addr *saddr, __be16 sport,
264264
const struct in6_addr *daddr, __be16 dport,
265265
int dif);
266266
struct sock *__udp6_lib_lookup(struct net *net,
267267
const struct in6_addr *saddr, __be16 sport,
268268
const struct in6_addr *daddr, __be16 dport,
269-
int dif, struct udp_table *tbl);
269+
int dif, struct udp_table *tbl,
270+
struct sk_buff *skb);
270271

271272
/*
272273
* SNMP statistics for UDP and UDP-Lite

include/uapi/asm-generic/socket.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,7 @@
8787
#define SO_ATTACH_BPF 50
8888
#define SO_DETACH_BPF SO_DETACH_FILTER
8989

90+
#define SO_ATTACH_REUSEPORT_CBPF 51
91+
#define SO_ATTACH_REUSEPORT_EBPF 52
92+
9093
#endif /* __ASM_GENERIC_SOCKET_H */

net/core/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
99

1010
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
1111
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
12-
sock_diag.o dev_ioctl.o tso.o
12+
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
1313

1414
obj-$(CONFIG_XFRM) += flow.o
1515
obj-y += net-sysfs.o

net/core/filter.c

Lines changed: 99 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include <net/cls_cgroup.h>
5151
#include <net/dst_metadata.h>
5252
#include <net/dst.h>
53+
#include <net/sock_reuseport.h>
5354

5455
/**
5556
* sk_filter - run a packet through a socket filter
@@ -1167,51 +1168,83 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
11671168
return 0;
11681169
}
11691170

1170-
/**
1171-
* sk_attach_filter - attach a socket filter
1172-
* @fprog: the filter program
1173-
* @sk: the socket to use
1174-
*
1175-
* Attach the user's filter code. We first run some sanity checks on
1176-
* it to make sure it does not explode on us later. If an error
1177-
* occurs or there is insufficient memory for the filter a negative
1178-
* errno code is returned. On success the return is zero.
1179-
*/
1180-
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1171+
static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1172+
{
1173+
struct bpf_prog *old_prog;
1174+
int err;
1175+
1176+
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1177+
return -ENOMEM;
1178+
1179+
if (sk_unhashed(sk)) {
1180+
err = reuseport_alloc(sk);
1181+
if (err)
1182+
return err;
1183+
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
1184+
/* The socket wasn't bound with SO_REUSEPORT */
1185+
return -EINVAL;
1186+
}
1187+
1188+
old_prog = reuseport_attach_prog(sk, prog);
1189+
if (old_prog)
1190+
bpf_prog_destroy(old_prog);
1191+
1192+
return 0;
1193+
}
1194+
1195+
static
1196+
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
11811197
{
11821198
unsigned int fsize = bpf_classic_proglen(fprog);
11831199
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
11841200
struct bpf_prog *prog;
11851201
int err;
11861202

11871203
if (sock_flag(sk, SOCK_FILTER_LOCKED))
1188-
return -EPERM;
1204+
return ERR_PTR(-EPERM);
11891205

11901206
/* Make sure new filter is there and in the right amounts. */
11911207
if (fprog->filter == NULL)
1192-
return -EINVAL;
1208+
return ERR_PTR(-EINVAL);
11931209

11941210
prog = bpf_prog_alloc(bpf_fsize, 0);
11951211
if (!prog)
1196-
return -ENOMEM;
1212+
return ERR_PTR(-ENOMEM);
11971213

11981214
if (copy_from_user(prog->insns, fprog->filter, fsize)) {
11991215
__bpf_prog_free(prog);
1200-
return -EFAULT;
1216+
return ERR_PTR(-EFAULT);
12011217
}
12021218

12031219
prog->len = fprog->len;
12041220

12051221
err = bpf_prog_store_orig_filter(prog, fprog);
12061222
if (err) {
12071223
__bpf_prog_free(prog);
1208-
return -ENOMEM;
1224+
return ERR_PTR(-ENOMEM);
12091225
}
12101226

12111227
/* bpf_prepare_filter() already takes care of freeing
12121228
* memory in case something goes wrong.
12131229
*/
1214-
prog = bpf_prepare_filter(prog, NULL);
1230+
return bpf_prepare_filter(prog, NULL);
1231+
}
1232+
1233+
/**
1234+
* sk_attach_filter - attach a socket filter
1235+
* @fprog: the filter program
1236+
* @sk: the socket to use
1237+
*
1238+
* Attach the user's filter code. We first run some sanity checks on
1239+
* it to make sure it does not explode on us later. If an error
1240+
* occurs or there is insufficient memory for the filter a negative
1241+
* errno code is returned. On success the return is zero.
1242+
*/
1243+
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1244+
{
1245+
struct bpf_prog *prog = __get_filter(fprog, sk);
1246+
int err;
1247+
12151248
if (IS_ERR(prog))
12161249
return PTR_ERR(prog);
12171250

@@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
12251258
}
12261259
EXPORT_SYMBOL_GPL(sk_attach_filter);
12271260

1228-
int sk_attach_bpf(u32 ufd, struct sock *sk)
1261+
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
12291262
{
1230-
struct bpf_prog *prog;
1263+
struct bpf_prog *prog = __get_filter(fprog, sk);
12311264
int err;
12321265

1266+
if (IS_ERR(prog))
1267+
return PTR_ERR(prog);
1268+
1269+
err = __reuseport_attach_prog(prog, sk);
1270+
if (err < 0) {
1271+
__bpf_prog_release(prog);
1272+
return err;
1273+
}
1274+
1275+
return 0;
1276+
}
1277+
1278+
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1279+
{
1280+
struct bpf_prog *prog;
1281+
12331282
if (sock_flag(sk, SOCK_FILTER_LOCKED))
1234-
return -EPERM;
1283+
return ERR_PTR(-EPERM);
12351284

12361285
prog = bpf_prog_get(ufd);
12371286
if (IS_ERR(prog))
1238-
return PTR_ERR(prog);
1287+
return prog;
12391288

12401289
if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
12411290
bpf_prog_put(prog);
1242-
return -EINVAL;
1291+
return ERR_PTR(-EINVAL);
12431292
}
12441293

1294+
return prog;
1295+
}
1296+
1297+
int sk_attach_bpf(u32 ufd, struct sock *sk)
1298+
{
1299+
struct bpf_prog *prog = __get_bpf(ufd, sk);
1300+
int err;
1301+
1302+
if (IS_ERR(prog))
1303+
return PTR_ERR(prog);
1304+
12451305
err = __sk_attach_prog(prog, sk);
12461306
if (err < 0) {
12471307
bpf_prog_put(prog);
@@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
12511311
return 0;
12521312
}
12531313

1314+
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1315+
{
1316+
struct bpf_prog *prog = __get_bpf(ufd, sk);
1317+
int err;
1318+
1319+
if (IS_ERR(prog))
1320+
return PTR_ERR(prog);
1321+
1322+
err = __reuseport_attach_prog(prog, sk);
1323+
if (err < 0) {
1324+
bpf_prog_put(prog);
1325+
return err;
1326+
}
1327+
1328+
return 0;
1329+
}
1330+
12541331
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
12551332
#define BPF_LDST_LEN 16U
12561333

0 commit comments

Comments
 (0)