Skip to content

Commit 324bda9

Browse files
4astdavem330
authored andcommitted
bpf: multi program support for cgroup+bpf
introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Daniel Borkmann <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> for cgroup bits Acked-by: Tejun Heo <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent c818fa9 commit 324bda9

File tree

8 files changed

+516
-169
lines changed

8 files changed

+516
-169
lines changed

include/linux/bpf-cgroup.h

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,42 @@ struct bpf_sock_ops_kern;
1414
extern struct static_key_false cgroup_bpf_enabled_key;
1515
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
1616

17+
struct bpf_prog_list {
18+
struct list_head node;
19+
struct bpf_prog *prog;
20+
};
21+
22+
struct bpf_prog_array;
23+
1724
struct cgroup_bpf {
18-
/*
19-
* Store two sets of bpf_prog pointers, one for programs that are
20-
* pinned directly to this cgroup, and one for those that are effective
21-
* when this cgroup is accessed.
25+
/* array of effective progs in this cgroup */
26+
struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
27+
28+
/* attached progs to this cgroup and attach flags
29+
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
30+
* have either zero or one element
31+
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
2232
*/
23-
struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
24-
struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
25-
bool disallow_override[MAX_BPF_ATTACH_TYPE];
33+
struct list_head progs[MAX_BPF_ATTACH_TYPE];
34+
u32 flags[MAX_BPF_ATTACH_TYPE];
35+
36+
/* temp storage for effective prog array used by prog_attach/detach */
37+
struct bpf_prog_array __rcu *inactive;
2638
};
2739

2840
void cgroup_bpf_put(struct cgroup *cgrp);
29-
void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
41+
int cgroup_bpf_inherit(struct cgroup *cgrp);
3042

31-
int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
32-
struct bpf_prog *prog, enum bpf_attach_type type,
33-
bool overridable);
43+
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
44+
enum bpf_attach_type type, u32 flags);
45+
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
46+
enum bpf_attach_type type, u32 flags);
3447

35-
/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
36-
int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
37-
enum bpf_attach_type type, bool overridable);
48+
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
49+
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
50+
enum bpf_attach_type type, u32 flags);
51+
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
52+
enum bpf_attach_type type, u32 flags);
3853

3954
int __cgroup_bpf_run_filter_skb(struct sock *sk,
4055
struct sk_buff *skb,
@@ -96,8 +111,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
96111

97112
struct cgroup_bpf {};
98113
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
99-
static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
100-
struct cgroup *parent) {}
114+
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
101115

102116
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
103117
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })

include/linux/bpf.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,38 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
241241
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
242242
union bpf_attr __user *uattr);
243243

244+
/* an array of programs to be executed under rcu_lock.
245+
*
246+
* Typical usage:
247+
* ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
248+
*
249+
* the structure returned by bpf_prog_array_alloc() should be populated
250+
* with program pointers and the last pointer must be NULL.
251+
* The user has to keep refcnt on the program and make sure the program
252+
* is removed from the array before bpf_prog_put().
253+
* The 'struct bpf_prog_array *' should only be replaced with xchg()
254+
* since other cpus are walking the array of pointers in parallel.
255+
*/
256+
struct bpf_prog_array {
257+
struct rcu_head rcu;
258+
struct bpf_prog *progs[0];
259+
};
260+
261+
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
262+
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
263+
264+
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
265+
({ \
266+
struct bpf_prog **_prog; \
267+
u32 _ret = 1; \
268+
rcu_read_lock(); \
269+
_prog = rcu_dereference(array)->progs; \
270+
for (; *_prog; _prog++) \
271+
_ret &= func(*_prog, ctx); \
272+
rcu_read_unlock(); \
273+
_ret; \
274+
})
275+
244276
#ifdef CONFIG_BPF_SYSCALL
245277
DECLARE_PER_CPU(int, bpf_prog_active);
246278

include/linux/filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ struct sk_filter {
481481
struct bpf_prog *prog;
482482
};
483483

484-
#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
484+
#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi)
485485

486486
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
487487

include/uapi/linux/bpf.h

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,47 @@ enum bpf_attach_type {
143143

144144
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
145145

146-
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
147-
* to the given target_fd cgroup the descendent cgroup will be able to
148-
* override effective bpf program that was inherited from this cgroup
146+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
147+
*
148+
* NONE(default): No further bpf programs allowed in the subtree.
149+
*
150+
* BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
151+
* the program in this cgroup yields to sub-cgroup program.
152+
*
153+
* BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
154+
* that cgroup program gets run in addition to the program in this cgroup.
155+
*
156+
* Only one program is allowed to be attached to a cgroup with
157+
* NONE or BPF_F_ALLOW_OVERRIDE flag.
158+
* Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
159+
* release old program and attach the new one. Attach flags has to match.
160+
*
161+
* Multiple programs are allowed to be attached to a cgroup with
162+
* BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
163+
* (those that were attached first, run first)
164+
* The programs of sub-cgroup are executed first, then programs of
165+
* this cgroup and then programs of parent cgroup.
166+
* When children program makes decision (like picking TCP CA or sock bind)
167+
* parent program has a chance to override it.
168+
*
169+
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
170+
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
171+
* Ex1:
172+
* cgrp1 (MULTI progs A, B) ->
173+
* cgrp2 (OVERRIDE prog C) ->
174+
* cgrp3 (MULTI prog D) ->
175+
* cgrp4 (OVERRIDE prog E) ->
176+
* cgrp5 (NONE prog F)
177+
* the event in cgrp5 triggers execution of F,D,A,B in that order.
178+
* if prog F is detached, the execution is E,D,A,B
179+
* if prog F and D are detached, the execution is E,A,B
180+
* if prog F, E and D are detached, the execution is C,A,B
181+
*
182+
* All eligible programs are executed regardless of return code from
183+
* earlier programs.
149184
*/
150185
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
186+
#define BPF_F_ALLOW_MULTI (1U << 1)
151187

152188
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
153189
* verifier will perform strict alignment checking as if the kernel

0 commit comments

Comments
 (0)