Skip to content

Commit 2cf69d3

Browse files
author
Alexei Starovoitov
committed
Merge branch 'cgroup-helpers'
Daniel Borkmann says: ==================== This adds various straight-forward helper improvements and additions to BPF cgroup based connect(), sendmsg(), recvmsg() and bind-related hooks which would allow to implement more fine-grained policies and improve current load balancer limitations we're seeing. For details please see individual patches. I've tested them on Kubernetes & Cilium and also added selftests for the small verifier extension. Thanks! ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents f54a5bb + 23599ad commit 2cf69d3

File tree

11 files changed

+336
-14
lines changed

11 files changed

+336
-14
lines changed

include/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ enum bpf_arg_type {
233233
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
234234

235235
ARG_PTR_TO_CTX, /* pointer to context */
236+
ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */
236237
ARG_ANYTHING, /* any (initialized) argument is ok */
237238
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
238239
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
@@ -1500,6 +1501,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
15001501
extern const struct bpf_func_proto bpf_sock_map_update_proto;
15011502
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
15021503
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
1504+
extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
15031505
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
15041506
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
15051507
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;

include/net/cls_cgroup.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,14 @@ static inline void sock_update_classid(struct sock_cgroup_data *skcd)
4545
sock_cgroup_set_classid(skcd, classid);
4646
}
4747

48+
static inline u32 __task_get_classid(struct task_struct *task)
49+
{
50+
return task_cls_state(task)->classid;
51+
}
52+
4853
static inline u32 task_get_classid(const struct sk_buff *skb)
4954
{
50-
u32 classid = task_cls_state(current)->classid;
55+
u32 classid = __task_get_classid(current);
5156

5257
/* Due to the nature of the classifier it is required to ignore all
5358
* packets originating from softirq context as accessing `current'

include/net/net_namespace.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ struct net {
168168
#ifdef CONFIG_XFRM
169169
struct netns_xfrm xfrm;
170170
#endif
171+
172+
atomic64_t net_cookie; /* written once */
173+
171174
#if IS_ENABLED(CONFIG_IP_VS)
172175
struct netns_ipvs *ipvs;
173176
#endif
@@ -273,6 +276,8 @@ static inline int check_net(const struct net *net)
273276

274277
void net_drop_ns(void *);
275278

279+
u64 net_gen_cookie(struct net *net);
280+
276281
#else
277282

278283
static inline struct net *get_net(struct net *net)
@@ -300,6 +305,11 @@ static inline int check_net(const struct net *net)
300305
return 1;
301306
}
302307

308+
static inline u64 net_gen_cookie(struct net *net)
309+
{
310+
return 0;
311+
}
312+
303313
#define net_drop_ns NULL
304314
#endif
305315

include/uapi/linux/bpf.h

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2950,6 +2950,37 @@ union bpf_attr {
29502950
* restricted to raw_tracepoint bpf programs.
29512951
* Return
29522952
* 0 on success, or a negative error in case of failure.
2953+
*
2954+
* u64 bpf_get_netns_cookie(void *ctx)
2955+
* Description
2956+
* Retrieve the cookie (generated by the kernel) of the network
2957+
* namespace the input *ctx* is associated with. The network
2958+
* namespace cookie remains stable for its lifetime and provides
2959+
* a global identifier that can be assumed unique. If *ctx* is
2960+
* NULL, then the helper returns the cookie for the initial
2961+
* network namespace. The cookie itself is very similar to that
2962+
* of bpf_get_socket_cookie() helper, but for network namespaces
2963+
* instead of sockets.
2964+
* Return
2965+
* A 8-byte long opaque number.
2966+
*
2967+
* u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
2968+
* Description
2969+
* Return id of cgroup v2 that is ancestor of the cgroup associated
2970+
* with the current task at the *ancestor_level*. The root cgroup
2971+
* is at *ancestor_level* zero and each step down the hierarchy
2972+
* increments the level. If *ancestor_level* == level of cgroup
2973+
* associated with the current task, then return value will be the
2974+
* same as that of **bpf_get_current_cgroup_id**\ ().
2975+
*
2976+
* The helper is useful to implement policies based on cgroups
2977+
* that are upper in hierarchy than immediate cgroup associated
2978+
* with the current task.
2979+
*
2980+
* The format of returned id and helper limitations are same as in
2981+
* **bpf_get_current_cgroup_id**\ ().
2982+
* Return
2983+
* The id is returned or 0 in case the id could not be retrieved.
29532984
*/
29542985
#define __BPF_FUNC_MAPPER(FN) \
29552986
FN(unspec), \
@@ -3073,7 +3104,9 @@ union bpf_attr {
30733104
FN(jiffies64), \
30743105
FN(read_branch_records), \
30753106
FN(get_ns_current_pid_tgid), \
3076-
FN(xdp_output),
3107+
FN(xdp_output), \
3108+
FN(get_netns_cookie), \
3109+
FN(get_current_ancestor_cgroup_id),
30773110

30783111
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
30793112
* function eBPF program intends to call

kernel/bpf/core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
21562156
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
21572157
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
21582158
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
2159+
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
21592160
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
21602161
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
21612162

kernel/bpf/helpers.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
340340
.ret_type = RET_INTEGER,
341341
};
342342

343+
BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
344+
{
345+
struct cgroup *cgrp = task_dfl_cgroup(current);
346+
struct cgroup *ancestor;
347+
348+
ancestor = cgroup_ancestor(cgrp, ancestor_level);
349+
if (!ancestor)
350+
return 0;
351+
return cgroup_id(ancestor);
352+
}
353+
354+
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
355+
.func = bpf_get_current_ancestor_cgroup_id,
356+
.gpl_only = false,
357+
.ret_type = RET_INTEGER,
358+
.arg1_type = ARG_ANYTHING,
359+
};
360+
343361
#ifdef CONFIG_CGROUP_BPF
344362
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
345363
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);

kernel/bpf/verifier.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3461,13 +3461,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
34613461
expected_type = CONST_PTR_TO_MAP;
34623462
if (type != expected_type)
34633463
goto err_type;
3464-
} else if (arg_type == ARG_PTR_TO_CTX) {
3464+
} else if (arg_type == ARG_PTR_TO_CTX ||
3465+
arg_type == ARG_PTR_TO_CTX_OR_NULL) {
34653466
expected_type = PTR_TO_CTX;
3466-
if (type != expected_type)
3467-
goto err_type;
3468-
err = check_ctx_reg(env, reg, regno);
3469-
if (err < 0)
3470-
return err;
3467+
if (!(register_is_null(reg) &&
3468+
arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
3469+
if (type != expected_type)
3470+
goto err_type;
3471+
err = check_ctx_reg(env, reg, regno);
3472+
if (err < 0)
3473+
return err;
3474+
}
34713475
} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
34723476
expected_type = PTR_TO_SOCK_COMMON;
34733477
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */

net/core/filter.c

Lines changed: 101 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2642,6 +2642,19 @@ static const struct bpf_func_proto bpf_msg_pop_data_proto = {
26422642
.arg4_type = ARG_ANYTHING,
26432643
};
26442644

2645+
#ifdef CONFIG_CGROUP_NET_CLASSID
2646+
BPF_CALL_0(bpf_get_cgroup_classid_curr)
2647+
{
2648+
return __task_get_classid(current);
2649+
}
2650+
2651+
static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
2652+
.func = bpf_get_cgroup_classid_curr,
2653+
.gpl_only = false,
2654+
.ret_type = RET_INTEGER,
2655+
};
2656+
#endif
2657+
26452658
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
26462659
{
26472660
return task_get_classid(skb);
@@ -4117,6 +4130,18 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
41174130
.arg1_type = ARG_PTR_TO_CTX,
41184131
};
41194132

4133+
BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
4134+
{
4135+
return sock_gen_cookie(ctx);
4136+
}
4137+
4138+
static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
4139+
.func = bpf_get_socket_cookie_sock,
4140+
.gpl_only = false,
4141+
.ret_type = RET_INTEGER,
4142+
.arg1_type = ARG_PTR_TO_CTX,
4143+
};
4144+
41204145
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
41214146
{
41224147
return sock_gen_cookie(ctx->sk);
@@ -4129,6 +4154,39 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
41294154
.arg1_type = ARG_PTR_TO_CTX,
41304155
};
41314156

4157+
static u64 __bpf_get_netns_cookie(struct sock *sk)
4158+
{
4159+
#ifdef CONFIG_NET_NS
4160+
return net_gen_cookie(sk ? sk->sk_net.net : &init_net);
4161+
#else
4162+
return 0;
4163+
#endif
4164+
}
4165+
4166+
BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
4167+
{
4168+
return __bpf_get_netns_cookie(ctx);
4169+
}
4170+
4171+
static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
4172+
.func = bpf_get_netns_cookie_sock,
4173+
.gpl_only = false,
4174+
.ret_type = RET_INTEGER,
4175+
.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4176+
};
4177+
4178+
BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4179+
{
4180+
return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4181+
}
4182+
4183+
static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
4184+
.func = bpf_get_netns_cookie_sock_addr,
4185+
.gpl_only = false,
4186+
.ret_type = RET_INTEGER,
4187+
.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4188+
};
4189+
41324190
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
41334191
{
41344192
struct sock *sk = sk_to_full_sk(skb->sk);
@@ -4147,17 +4205,17 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
41474205
.arg1_type = ARG_PTR_TO_CTX,
41484206
};
41494207

4150-
BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
4151-
struct bpf_map *, map, u64, flags, void *, data, u64, size)
4208+
BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags,
4209+
void *, data, u64, size)
41524210
{
41534211
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
41544212
return -EINVAL;
41554213

41564214
return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
41574215
}
41584216

4159-
static const struct bpf_func_proto bpf_sockopt_event_output_proto = {
4160-
.func = bpf_sockopt_event_output,
4217+
static const struct bpf_func_proto bpf_event_output_data_proto = {
4218+
.func = bpf_event_output_data,
41614219
.gpl_only = true,
41624220
.ret_type = RET_INTEGER,
41634221
.arg1_type = ARG_PTR_TO_CTX,
@@ -5954,6 +6012,26 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
59546012
return &bpf_get_current_uid_gid_proto;
59556013
case BPF_FUNC_get_local_storage:
59566014
return &bpf_get_local_storage_proto;
6015+
case BPF_FUNC_get_socket_cookie:
6016+
return &bpf_get_socket_cookie_sock_proto;
6017+
case BPF_FUNC_get_netns_cookie:
6018+
return &bpf_get_netns_cookie_sock_proto;
6019+
case BPF_FUNC_perf_event_output:
6020+
return &bpf_event_output_data_proto;
6021+
case BPF_FUNC_get_current_pid_tgid:
6022+
return &bpf_get_current_pid_tgid_proto;
6023+
case BPF_FUNC_get_current_comm:
6024+
return &bpf_get_current_comm_proto;
6025+
#ifdef CONFIG_CGROUPS
6026+
case BPF_FUNC_get_current_cgroup_id:
6027+
return &bpf_get_current_cgroup_id_proto;
6028+
case BPF_FUNC_get_current_ancestor_cgroup_id:
6029+
return &bpf_get_current_ancestor_cgroup_id_proto;
6030+
#endif
6031+
#ifdef CONFIG_CGROUP_NET_CLASSID
6032+
case BPF_FUNC_get_cgroup_classid:
6033+
return &bpf_get_cgroup_classid_curr_proto;
6034+
#endif
59576035
default:
59586036
return bpf_base_func_proto(func_id);
59596037
}
@@ -5978,8 +6056,26 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
59786056
}
59796057
case BPF_FUNC_get_socket_cookie:
59806058
return &bpf_get_socket_cookie_sock_addr_proto;
6059+
case BPF_FUNC_get_netns_cookie:
6060+
return &bpf_get_netns_cookie_sock_addr_proto;
59816061
case BPF_FUNC_get_local_storage:
59826062
return &bpf_get_local_storage_proto;
6063+
case BPF_FUNC_perf_event_output:
6064+
return &bpf_event_output_data_proto;
6065+
case BPF_FUNC_get_current_pid_tgid:
6066+
return &bpf_get_current_pid_tgid_proto;
6067+
case BPF_FUNC_get_current_comm:
6068+
return &bpf_get_current_comm_proto;
6069+
#ifdef CONFIG_CGROUPS
6070+
case BPF_FUNC_get_current_cgroup_id:
6071+
return &bpf_get_current_cgroup_id_proto;
6072+
case BPF_FUNC_get_current_ancestor_cgroup_id:
6073+
return &bpf_get_current_ancestor_cgroup_id_proto;
6074+
#endif
6075+
#ifdef CONFIG_CGROUP_NET_CLASSID
6076+
case BPF_FUNC_get_cgroup_classid:
6077+
return &bpf_get_cgroup_classid_curr_proto;
6078+
#endif
59836079
#ifdef CONFIG_INET
59846080
case BPF_FUNC_sk_lookup_tcp:
59856081
return &bpf_sock_addr_sk_lookup_tcp_proto;
@@ -6222,7 +6318,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
62226318
case BPF_FUNC_get_local_storage:
62236319
return &bpf_get_local_storage_proto;
62246320
case BPF_FUNC_perf_event_output:
6225-
return &bpf_sockopt_event_output_proto;
6321+
return &bpf_event_output_data_proto;
62266322
case BPF_FUNC_sk_storage_get:
62276323
return &bpf_sk_storage_get_proto;
62286324
case BPF_FUNC_sk_storage_delete:

net/core/net_namespace.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,20 @@ EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
6969

7070
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
7171

72+
static atomic64_t cookie_gen;
73+
74+
u64 net_gen_cookie(struct net *net)
75+
{
76+
while (1) {
77+
u64 res = atomic64_read(&net->net_cookie);
78+
79+
if (res)
80+
return res;
81+
res = atomic64_inc_return(&cookie_gen);
82+
atomic64_cmpxchg(&net->net_cookie, 0, res);
83+
}
84+
}
85+
7286
static struct net_generic *net_alloc_generic(void)
7387
{
7488
struct net_generic *ng;
@@ -1087,6 +1101,7 @@ static int __init net_ns_init(void)
10871101
panic("Could not allocate generic netns");
10881102

10891103
rcu_assign_pointer(init_net.gen, ng);
1104+
net_gen_cookie(&init_net);
10901105

10911106
down_write(&pernet_ops_rwsem);
10921107
if (setup_net(&init_net, &init_user_ns))

0 commit comments

Comments
 (0)