Skip to content

Commit 8217ca6

Browse files
iamkafaiborkmann
authored andcommitted
bpf: Enable BPF_PROG_TYPE_SK_REUSEPORT bpf prog in reuseport selection
This patch allows a BPF_PROG_TYPE_SK_REUSEPORT bpf prog to select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY introduced in the earlier patch. "bpf_run_sk_reuseport()" will return -ECONNREFUSED when the BPF_PROG_TYPE_SK_REUSEPORT prog returns SK_DROP. The callers, in inet[6]_hashtable.c and ipv[46]/udp.c, are modified to handle this case and return NULL immediately instead of continuing the sk search from its hashtable. It re-uses the existing SO_ATTACH_REUSEPORT_EBPF setsockopt to attach BPF_PROG_TYPE_SK_REUSEPORT. The "sk_reuseport_attach_bpf()" will check if the attaching bpf prog is in the new SK_REUSEPORT or the existing SOCKET_FILTER type and then check different things accordingly. One level of "__reuseport_attach_prog()" call is removed. The "sk_unhashed() && ..." and "sk->sk_reuseport_cb" tests are pushed back to "reuseport_attach_prog()" in sock_reuseport.c. sock_reuseport.c seems to have more knowledge on those test requirements than filter.c. In "reuseport_attach_prog()", after new_prog is attached to reuse->prog, the old_prog (if any) is also directly freed instead of returning the old_prog to the caller and asking the caller to free. The sysctl_optmem_max check is moved back to the "sk_reuseport_attach_filter()" and "sk_reuseport_attach_bpf()". As of other bpf prog types, the new BPF_PROG_TYPE_SK_REUSEPORT is only bounded by the usual "bpf_prog_charge_memlock()" during load time instead of bounded by both bpf_prog_charge_memlock and sysctl_optmem_max. Signed-off-by: Martin KaFai Lau <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 2dbb9b9 commit 8217ca6

File tree

8 files changed

+106
-57
lines changed

8 files changed

+106
-57
lines changed

include/linux/filter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
753753
int sk_attach_bpf(u32 ufd, struct sock *sk);
754754
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
755755
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
756+
void sk_reuseport_prog_free(struct bpf_prog *prog);
756757
int sk_detach_filter(struct sock *sk);
757758
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
758759
unsigned int len);

include/net/sock_reuseport.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
3434
u32 hash,
3535
struct sk_buff *skb,
3636
int hdr_len);
37-
extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
38-
struct bpf_prog *prog);
37+
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
3938
int reuseport_get_id(struct sock_reuseport *reuse);
4039

4140
#endif /* _SOCK_REUSEPORT_H */

net/core/filter.c

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,30 +1453,6 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
14531453
return 0;
14541454
}
14551455

1456-
static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1457-
{
1458-
struct bpf_prog *old_prog;
1459-
int err;
1460-
1461-
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1462-
return -ENOMEM;
1463-
1464-
if (sk_unhashed(sk) && sk->sk_reuseport) {
1465-
err = reuseport_alloc(sk, false);
1466-
if (err)
1467-
return err;
1468-
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
1469-
/* The socket wasn't bound with SO_REUSEPORT */
1470-
return -EINVAL;
1471-
}
1472-
1473-
old_prog = reuseport_attach_prog(sk, prog);
1474-
if (old_prog)
1475-
bpf_prog_destroy(old_prog);
1476-
1477-
return 0;
1478-
}
1479-
14801456
static
14811457
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
14821458
{
@@ -1550,13 +1526,15 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
15501526
if (IS_ERR(prog))
15511527
return PTR_ERR(prog);
15521528

1553-
err = __reuseport_attach_prog(prog, sk);
1554-
if (err < 0) {
1529+
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1530+
err = -ENOMEM;
1531+
else
1532+
err = reuseport_attach_prog(sk, prog);
1533+
1534+
if (err)
15551535
__bpf_prog_release(prog);
1556-
return err;
1557-
}
15581536

1559-
return 0;
1537+
return err;
15601538
}
15611539

15621540
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
@@ -1586,19 +1564,58 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
15861564

15871565
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
15881566
{
1589-
struct bpf_prog *prog = __get_bpf(ufd, sk);
1567+
struct bpf_prog *prog;
15901568
int err;
15911569

1570+
if (sock_flag(sk, SOCK_FILTER_LOCKED))
1571+
return -EPERM;
1572+
1573+
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1574+
if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
1575+
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
15921576
if (IS_ERR(prog))
15931577
return PTR_ERR(prog);
15941578

1595-
err = __reuseport_attach_prog(prog, sk);
1596-
if (err < 0) {
1597-
bpf_prog_put(prog);
1598-
return err;
1579+
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1580+
/* Like other non BPF_PROG_TYPE_SOCKET_FILTER
1581+
* bpf prog (e.g. sockmap). It depends on the
1582+
* limitation imposed by bpf_prog_load().
1583+
* Hence, sysctl_optmem_max is not checked.
1584+
*/
1585+
if ((sk->sk_type != SOCK_STREAM &&
1586+
sk->sk_type != SOCK_DGRAM) ||
1587+
(sk->sk_protocol != IPPROTO_UDP &&
1588+
sk->sk_protocol != IPPROTO_TCP) ||
1589+
(sk->sk_family != AF_INET &&
1590+
sk->sk_family != AF_INET6)) {
1591+
err = -ENOTSUPP;
1592+
goto err_prog_put;
1593+
}
1594+
} else {
1595+
/* BPF_PROG_TYPE_SOCKET_FILTER */
1596+
if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
1597+
err = -ENOMEM;
1598+
goto err_prog_put;
1599+
}
15991600
}
16001601

1601-
return 0;
1602+
err = reuseport_attach_prog(sk, prog);
1603+
err_prog_put:
1604+
if (err)
1605+
bpf_prog_put(prog);
1606+
1607+
return err;
1608+
}
1609+
1610+
void sk_reuseport_prog_free(struct bpf_prog *prog)
1611+
{
1612+
if (!prog)
1613+
return;
1614+
1615+
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1616+
bpf_prog_put(prog);
1617+
else
1618+
bpf_prog_destroy(prog);
16021619
}
16031620

16041621
struct bpf_scratchpad {

net/core/sock_reuseport.c

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <net/sock_reuseport.h>
1010
#include <linux/bpf.h>
1111
#include <linux/idr.h>
12+
#include <linux/filter.h>
1213
#include <linux/rcupdate.h>
1314

1415
#define INIT_SOCKS 128
@@ -133,8 +134,7 @@ static void reuseport_free_rcu(struct rcu_head *head)
133134
struct sock_reuseport *reuse;
134135

135136
reuse = container_of(head, struct sock_reuseport, rcu);
136-
if (reuse->prog)
137-
bpf_prog_destroy(reuse->prog);
137+
sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
138138
if (reuse->reuseport_id)
139139
ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
140140
kfree(reuse);
@@ -219,9 +219,9 @@ void reuseport_detach_sock(struct sock *sk)
219219
}
220220
EXPORT_SYMBOL(reuseport_detach_sock);
221221

222-
static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
223-
struct bpf_prog *prog, struct sk_buff *skb,
224-
int hdr_len)
222+
static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
223+
struct bpf_prog *prog, struct sk_buff *skb,
224+
int hdr_len)
225225
{
226226
struct sk_buff *nskb = NULL;
227227
u32 index;
@@ -282,9 +282,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
282282
/* paired with smp_wmb() in reuseport_add_sock() */
283283
smp_rmb();
284284

285-
if (prog && skb)
286-
sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
285+
if (!prog || !skb)
286+
goto select_by_hash;
287+
288+
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
289+
sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
290+
else
291+
sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
287292

293+
select_by_hash:
288294
/* no bpf or invalid bpf result: fall back to hash usage */
289295
if (!sk2)
290296
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
@@ -296,12 +302,21 @@ struct sock *reuseport_select_sock(struct sock *sk,
296302
}
297303
EXPORT_SYMBOL(reuseport_select_sock);
298304

299-
struct bpf_prog *
300-
reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
305+
int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
301306
{
302307
struct sock_reuseport *reuse;
303308
struct bpf_prog *old_prog;
304309

310+
if (sk_unhashed(sk) && sk->sk_reuseport) {
311+
int err = reuseport_alloc(sk, false);
312+
313+
if (err)
314+
return err;
315+
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
316+
/* The socket wasn't bound with SO_REUSEPORT */
317+
return -EINVAL;
318+
}
319+
305320
spin_lock_bh(&reuseport_lock);
306321
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
307322
lockdep_is_held(&reuseport_lock));
@@ -310,6 +325,7 @@ reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
310325
rcu_assign_pointer(reuse->prog, prog);
311326
spin_unlock_bh(&reuseport_lock);
312327

313-
return old_prog;
328+
sk_reuseport_prog_free(old_prog);
329+
return 0;
314330
}
315331
EXPORT_SYMBOL(reuseport_attach_prog);

net/ipv4/inet_hashtables.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ struct sock *__inet_lookup_listener(struct net *net,
328328
saddr, sport, daddr, hnum,
329329
dif, sdif);
330330
if (result)
331-
return result;
331+
goto done;
332332

333333
/* Lookup lhash2 with INADDR_ANY */
334334

@@ -337,9 +337,10 @@ struct sock *__inet_lookup_listener(struct net *net,
337337
if (ilb2->count > ilb->count)
338338
goto port_lookup;
339339

340-
return inet_lhash2_lookup(net, ilb2, skb, doff,
341-
saddr, sport, daddr, hnum,
342-
dif, sdif);
340+
result = inet_lhash2_lookup(net, ilb2, skb, doff,
341+
saddr, sport, daddr, hnum,
342+
dif, sdif);
343+
goto done;
343344

344345
port_lookup:
345346
sk_for_each_rcu(sk, &ilb->head) {
@@ -352,12 +353,15 @@ struct sock *__inet_lookup_listener(struct net *net,
352353
result = reuseport_select_sock(sk, phash,
353354
skb, doff);
354355
if (result)
355-
return result;
356+
goto done;
356357
}
357358
result = sk;
358359
hiscore = score;
359360
}
360361
}
362+
done:
363+
if (unlikely(IS_ERR(result)))
364+
return NULL;
361365
return result;
362366
}
363367
EXPORT_SYMBOL_GPL(__inet_lookup_listener);

net/ipv4/udp.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
499499
daddr, hnum, dif, sdif,
500500
exact_dif, hslot2, skb);
501501
}
502+
if (unlikely(IS_ERR(result)))
503+
return NULL;
502504
return result;
503505
}
504506
begin:
@@ -513,6 +515,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
513515
saddr, sport);
514516
result = reuseport_select_sock(sk, hash, skb,
515517
sizeof(struct udphdr));
518+
if (unlikely(IS_ERR(result)))
519+
return NULL;
516520
if (result)
517521
return result;
518522
}

net/ipv6/inet6_hashtables.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ struct sock *inet6_lookup_listener(struct net *net,
191191
saddr, sport, daddr, hnum,
192192
dif, sdif);
193193
if (result)
194-
return result;
194+
goto done;
195195

196196
/* Lookup lhash2 with in6addr_any */
197197

@@ -200,9 +200,10 @@ struct sock *inet6_lookup_listener(struct net *net,
200200
if (ilb2->count > ilb->count)
201201
goto port_lookup;
202202

203-
return inet6_lhash2_lookup(net, ilb2, skb, doff,
204-
saddr, sport, daddr, hnum,
205-
dif, sdif);
203+
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
204+
saddr, sport, daddr, hnum,
205+
dif, sdif);
206+
goto done;
206207

207208
port_lookup:
208209
sk_for_each(sk, &ilb->head) {
@@ -214,12 +215,15 @@ struct sock *inet6_lookup_listener(struct net *net,
214215
result = reuseport_select_sock(sk, phash,
215216
skb, doff);
216217
if (result)
217-
return result;
218+
goto done;
218219
}
219220
result = sk;
220221
hiscore = score;
221222
}
222223
}
224+
done:
225+
if (unlikely(IS_ERR(result)))
226+
return NULL;
223227
return result;
224228
}
225229
EXPORT_SYMBOL_GPL(inet6_lookup_listener);

net/ipv6/udp.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
235235
exact_dif, hslot2,
236236
skb);
237237
}
238+
if (unlikely(IS_ERR(result)))
239+
return NULL;
238240
return result;
239241
}
240242
begin:
@@ -249,6 +251,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
249251
saddr, sport);
250252
result = reuseport_select_sock(sk, hash, skb,
251253
sizeof(struct udphdr));
254+
if (unlikely(IS_ERR(result)))
255+
return NULL;
252256
if (result)
253257
return result;
254258
}

0 commit comments

Comments
 (0)