Skip to content

Commit 7ecc66b

Browse files
4astSomasundaram Krishnasamy
authored andcommitted
bpf: multi program support for cgroup+bpf
introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Daniel Borkmann <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> for cgroup bits Acked-by: Tejun Heo <[email protected]> Signed-off-by: David S. Miller <[email protected]> (cherry picked from commit 324bda9) Orabug: 31667601 Signed-off-by: Alan Maguire <[email protected]> Reviewed-by: Mark Haywood <[email protected]> Conflicts: kernel/cgroup/cgroup.c include/linux/bpf-cgroup.h context differences around out_* labels in cgroup_create() in cgroup.c KABI replacement/extensions to "struct cgroup_bpf" which involved reordering the fields such that the order matches previous version of the struct; progs and effective fields are reordered wrt upstream to match previous order and flags field replaces the bool disallow_override field; finally the inactive progs array is appended. Signed-off-by: Somasundaram Krishnasamy <[email protected]>
1 parent 83e85d1 commit 7ecc66b

File tree

8 files changed

+515
-169
lines changed

8 files changed

+515
-169
lines changed

include/linux/bpf-cgroup.h

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,43 @@ struct bpf_sock_ops_kern;
1515
extern struct static_key_false cgroup_bpf_enabled_key;
1616
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
1717

18+
struct bpf_prog_list {
19+
struct list_head node;
20+
struct bpf_prog *prog;
21+
};
22+
23+
struct bpf_prog_array;
24+
1825
struct cgroup_bpf {
19-
/*
20-
* Store two sets of bpf_prog pointers, one for programs that are
21-
* pinned directly to this cgroup, and one for those that are effective
22-
* when this cgroup is accessed.
26+
/* attached progs to this cgroup and attach flags
27+
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
28+
* have either zero or one element
29+
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
2330
*/
24-
struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
25-
struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
26-
bool disallow_override[MAX_BPF_ATTACH_TYPE];
31+
UEK_KABI_REPLACE(struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE],
32+
struct list_head progs[MAX_BPF_ATTACH_TYPE])
33+
/* array of effective progs in this cgroup */
34+
UEK_KABI_REPLACE(struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE],
35+
struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE])
36+
UEK_KABI_REPLACE(bool disallow_override[MAX_BPF_ATTACH_TYPE],
37+
u32 flags[MAX_BPF_ATTACH_TYPE])
38+
/* temp storage for effective prog array used by prog_attach/detach */
39+
UEK_KABI_EXTEND(struct bpf_prog_array __rcu *inactive)
2740
};
2841

2942
void cgroup_bpf_put(struct cgroup *cgrp);
30-
void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
43+
int cgroup_bpf_inherit(struct cgroup *cgrp);
3144

32-
int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
33-
struct bpf_prog *prog, enum bpf_attach_type type,
34-
bool overridable);
45+
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
46+
enum bpf_attach_type type, u32 flags);
47+
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
48+
enum bpf_attach_type type, u32 flags);
3549

36-
/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
37-
int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
38-
enum bpf_attach_type type, bool overridable);
50+
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
51+
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
52+
enum bpf_attach_type type, u32 flags);
53+
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
54+
enum bpf_attach_type type, u32 flags);
3955

4056
int __cgroup_bpf_run_filter_skb(struct sock *sk,
4157
struct sk_buff *skb,
@@ -97,8 +113,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
97113

98114
struct cgroup_bpf {};
99115
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
100-
static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
101-
struct cgroup *parent) {}
116+
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
102117

103118
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
104119
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })

include/linux/bpf.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,38 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
250250
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
251251
union bpf_attr __user *uattr);
252252

253+
/* an array of programs to be executed under rcu_lock.
254+
*
255+
* Typical usage:
256+
* ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
257+
*
258+
* the structure returned by bpf_prog_array_alloc() should be populated
259+
* with program pointers and the last pointer must be NULL.
260+
* The user has to keep refcnt on the program and make sure the program
261+
* is removed from the array before bpf_prog_put().
262+
* The 'struct bpf_prog_array *' should only be replaced with xchg()
263+
* since other cpus are walking the array of pointers in parallel.
264+
*/
265+
struct bpf_prog_array {
266+
struct rcu_head rcu;
267+
struct bpf_prog *progs[0];
268+
};
269+
270+
struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
271+
void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
272+
273+
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
274+
({ \
275+
struct bpf_prog **_prog; \
276+
u32 _ret = 1; \
277+
rcu_read_lock(); \
278+
_prog = rcu_dereference(array)->progs; \
279+
for (; *_prog; _prog++) \
280+
_ret &= func(*_prog, ctx); \
281+
rcu_read_unlock(); \
282+
_ret; \
283+
})
284+
253285
#ifdef CONFIG_BPF_SYSCALL
254286
DECLARE_PER_CPU(int, bpf_prog_active);
255287

include/linux/filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ struct sk_filter {
477477
struct bpf_prog *prog;
478478
};
479479

480-
#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
480+
#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi)
481481

482482
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
483483

include/uapi/linux/bpf.h

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,47 @@ enum bpf_attach_type {
144144

145145
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
146146

147-
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
148-
* to the given target_fd cgroup the descendent cgroup will be able to
149-
* override effective bpf program that was inherited from this cgroup
147+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
148+
*
149+
* NONE(default): No further bpf programs allowed in the subtree.
150+
*
151+
* BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
152+
* the program in this cgroup yields to sub-cgroup program.
153+
*
154+
* BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
155+
* that cgroup program gets run in addition to the program in this cgroup.
156+
*
157+
* Only one program is allowed to be attached to a cgroup with
158+
* NONE or BPF_F_ALLOW_OVERRIDE flag.
159+
* Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
160+
* release old program and attach the new one. Attach flags has to match.
161+
*
162+
* Multiple programs are allowed to be attached to a cgroup with
163+
* BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
164+
* (those that were attached first, run first)
165+
* The programs of sub-cgroup are executed first, then programs of
166+
* this cgroup and then programs of parent cgroup.
167+
* When children program makes decision (like picking TCP CA or sock bind)
168+
* parent program has a chance to override it.
169+
*
170+
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
171+
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
172+
* Ex1:
173+
* cgrp1 (MULTI progs A, B) ->
174+
* cgrp2 (OVERRIDE prog C) ->
175+
* cgrp3 (MULTI prog D) ->
176+
* cgrp4 (OVERRIDE prog E) ->
177+
* cgrp5 (NONE prog F)
178+
* the event in cgrp5 triggers execution of F,D,A,B in that order.
179+
* if prog F is detached, the execution is E,D,A,B
180+
* if prog F and D are detached, the execution is E,A,B
181+
* if prog F, E and D are detached, the execution is C,A,B
182+
*
183+
* All eligible programs are executed regardless of return code from
184+
* earlier programs.
150185
*/
151186
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
187+
#define BPF_F_ALLOW_MULTI (1U << 1)
152188

153189
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
154190
* verifier will perform strict alignment checking as if the kernel

0 commit comments

Comments
 (0)