Skip to content

Commit 6102365

Browse files
David Aherndavem330
authored andcommitted
bpf: Add new cgroup attach type to enable sock modifications
Add new cgroup based program type, BPF_PROG_TYPE_CGROUP_SOCK. Similar to BPF_PROG_TYPE_CGROUP_SKB programs can be attached to a cgroup and run any time a process in the cgroup opens an AF_INET or AF_INET6 socket. Currently only sk_bound_dev_if is exported to userspace for modification by a bpf program. This allows a cgroup to be configured such that AF_INET{6} sockets opened by processes are automatically bound to a specific device. In turn, this enables the running of programs that do not support SO_BINDTODEVICE in a specific VRF context / L3 domain. Signed-off-by: David Ahern <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b2cd125 commit 6102365

File tree

7 files changed

+138
-2
lines changed

7 files changed

+138
-2
lines changed

include/linux/bpf-cgroup.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
4040
struct sk_buff *skb,
4141
enum bpf_attach_type type);
4242

43+
int __cgroup_bpf_run_filter_sk(struct sock *sk,
44+
enum bpf_attach_type type);
45+
4346
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
4447
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
4548
({ \
@@ -63,6 +66,16 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
6366
__ret; \
6467
})
6568

69+
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
70+
({ \
71+
int __ret = 0; \
72+
if (cgroup_bpf_enabled && sk) { \
73+
__ret = __cgroup_bpf_run_filter_sk(sk, \
74+
BPF_CGROUP_INET_SOCK_CREATE); \
75+
} \
76+
__ret; \
77+
})
78+
6679
#else
6780

6881
struct cgroup_bpf {};
@@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
7285

7386
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
7487
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
88+
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
7589

7690
#endif /* CONFIG_CGROUP_BPF */
7791

include/uapi/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ enum bpf_prog_type {
101101
BPF_PROG_TYPE_XDP,
102102
BPF_PROG_TYPE_PERF_EVENT,
103103
BPF_PROG_TYPE_CGROUP_SKB,
104+
BPF_PROG_TYPE_CGROUP_SOCK,
104105
BPF_PROG_TYPE_LWT_IN,
105106
BPF_PROG_TYPE_LWT_OUT,
106107
BPF_PROG_TYPE_LWT_XMIT,
@@ -109,6 +110,7 @@ enum bpf_prog_type {
109110
enum bpf_attach_type {
110111
BPF_CGROUP_INET_INGRESS,
111112
BPF_CGROUP_INET_EGRESS,
113+
BPF_CGROUP_INET_SOCK_CREATE,
112114
__MAX_BPF_ATTACH_TYPE
113115
};
114116

@@ -567,6 +569,10 @@ enum bpf_ret_code {
567569
/* >127 are reserved for prog type specific return codes */
568570
};
569571

572+
struct bpf_sock {
573+
__u32 bound_dev_if;
574+
};
575+
570576
/* User return codes for XDP prog type.
571577
* A valid XDP program must return one of these defined values. All other
572578
* return codes are reserved for future use. Unknown return codes will result

kernel/bpf/cgroup.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,36 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
165165
return ret;
166166
}
167167
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
168+
169+
/**
170+
* __cgroup_bpf_run_filter_sk() - Run a program on a sock
171+
* @sk: sock structure to manipulate
172+
* @type: The type of program to be exectuted
173+
*
174+
* socket is passed is expected to be of type INET or INET6.
175+
*
176+
* The program type passed in via @type must be suitable for sock
177+
* filtering. No further check is performed to assert that.
178+
*
179+
* This function will return %-EPERM if any if an attached program was found
180+
* and if it returned != 1 during execution. In all other cases, 0 is returned.
181+
*/
182+
int __cgroup_bpf_run_filter_sk(struct sock *sk,
183+
enum bpf_attach_type type)
184+
{
185+
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
186+
struct bpf_prog *prog;
187+
int ret = 0;
188+
189+
190+
rcu_read_lock();
191+
192+
prog = rcu_dereference(cgrp->bpf.effective[type]);
193+
if (prog)
194+
ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
195+
196+
rcu_read_unlock();
197+
198+
return ret;
199+
}
200+
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);

kernel/bpf/syscall.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
869869
case BPF_CGROUP_INET_EGRESS:
870870
ptype = BPF_PROG_TYPE_CGROUP_SKB;
871871
break;
872-
872+
case BPF_CGROUP_INET_SOCK_CREATE:
873+
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
874+
break;
873875
default:
874876
return -EINVAL;
875877
}
@@ -905,6 +907,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
905907
switch (attr->attach_type) {
906908
case BPF_CGROUP_INET_INGRESS:
907909
case BPF_CGROUP_INET_EGRESS:
910+
case BPF_CGROUP_INET_SOCK_CREATE:
908911
cgrp = cgroup_get_from_fd(attr->target_fd);
909912
if (IS_ERR(cgrp))
910913
return PTR_ERR(cgrp);

net/core/filter.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2818,6 +2818,32 @@ static bool lwt_is_valid_access(int off, int size,
28182818
return __is_valid_access(off, size, type);
28192819
}
28202820

2821+
static bool sock_filter_is_valid_access(int off, int size,
2822+
enum bpf_access_type type,
2823+
enum bpf_reg_type *reg_type)
2824+
{
2825+
if (type == BPF_WRITE) {
2826+
switch (off) {
2827+
case offsetof(struct bpf_sock, bound_dev_if):
2828+
break;
2829+
default:
2830+
return false;
2831+
}
2832+
}
2833+
2834+
if (off < 0 || off + size > sizeof(struct bpf_sock))
2835+
return false;
2836+
2837+
/* The verifier guarantees that size > 0. */
2838+
if (off % size != 0)
2839+
return false;
2840+
2841+
if (size != sizeof(__u32))
2842+
return false;
2843+
2844+
return true;
2845+
}
2846+
28212847
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
28222848
const struct bpf_prog *prog)
28232849
{
@@ -3076,6 +3102,30 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
30763102
return insn - insn_buf;
30773103
}
30783104

3105+
static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
3106+
int dst_reg, int src_reg,
3107+
int ctx_off,
3108+
struct bpf_insn *insn_buf,
3109+
struct bpf_prog *prog)
3110+
{
3111+
struct bpf_insn *insn = insn_buf;
3112+
3113+
switch (ctx_off) {
3114+
case offsetof(struct bpf_sock, bound_dev_if):
3115+
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
3116+
3117+
if (type == BPF_WRITE)
3118+
*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
3119+
offsetof(struct sock, sk_bound_dev_if));
3120+
else
3121+
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
3122+
offsetof(struct sock, sk_bound_dev_if));
3123+
break;
3124+
}
3125+
3126+
return insn - insn_buf;
3127+
}
3128+
30793129
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
30803130
int src_reg, int ctx_off,
30813131
struct bpf_insn *insn_buf,
@@ -3162,6 +3212,12 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
31623212
.gen_prologue = tc_cls_act_prologue,
31633213
};
31643214

3215+
static const struct bpf_verifier_ops cg_sock_ops = {
3216+
.get_func_proto = sk_filter_func_proto,
3217+
.is_valid_access = sock_filter_is_valid_access,
3218+
.convert_ctx_access = sock_filter_convert_ctx_access,
3219+
};
3220+
31653221
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
31663222
.ops = &sk_filter_ops,
31673223
.type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3202,13 +3258,19 @@ static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
32023258
.type = BPF_PROG_TYPE_LWT_XMIT,
32033259
};
32043260

3261+
static struct bpf_prog_type_list cg_sock_type __read_mostly = {
3262+
.ops = &cg_sock_ops,
3263+
.type = BPF_PROG_TYPE_CGROUP_SOCK
3264+
};
3265+
32053266
static int __init register_sk_filter_ops(void)
32063267
{
32073268
bpf_register_prog_type(&sk_filter_type);
32083269
bpf_register_prog_type(&sched_cls_type);
32093270
bpf_register_prog_type(&sched_act_type);
32103271
bpf_register_prog_type(&xdp_type);
32113272
bpf_register_prog_type(&cg_skb_type);
3273+
bpf_register_prog_type(&cg_sock_type);
32123274
bpf_register_prog_type(&lwt_in_type);
32133275
bpf_register_prog_type(&lwt_out_type);
32143276
bpf_register_prog_type(&lwt_xmit_type);

net/ipv4/af_inet.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,18 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
374374

375375
if (sk->sk_prot->init) {
376376
err = sk->sk_prot->init(sk);
377-
if (err)
377+
if (err) {
378+
sk_common_release(sk);
379+
goto out;
380+
}
381+
}
382+
383+
if (!kern) {
384+
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
385+
if (err) {
378386
sk_common_release(sk);
387+
goto out;
388+
}
379389
}
380390
out:
381391
return err;

net/ipv6/af_inet6.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,14 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
258258
goto out;
259259
}
260260
}
261+
262+
if (!kern) {
263+
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
264+
if (err) {
265+
sk_common_release(sk);
266+
goto out;
267+
}
268+
}
261269
out:
262270
return err;
263271
out_rcu_unlock:

0 commit comments

Comments
 (0)