Skip to content

Commit 2c78ee8

Browse files
Alexei Starovoitovborkmann
authored andcommitted
bpf: Implement CAP_BPF
Implement permissions as stated in uapi/linux/capability.h In order to do that the verifier allow_ptr_leaks flag is split into four flags and they are set as: env->allow_ptr_leaks = bpf_allow_ptr_leaks(); env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); The first three currently equivalent to perfmon_capable(), since leaking kernel pointers and reading kernel memory via side channel attacks is roughly equivalent to reading kernel memory with cap_perfmon. 'bpf_capable' enables bounded loops, precision tracking, bpf to bpf calls and other verifier features. 'allow_ptr_leaks' enable ptr leaks, ptr conversions, subtraction of pointers. 'bypass_spec_v1' disables speculative analysis in the verifier, run time mitigations in bpf array, and enables indirect variable access in bpf programs. 'bypass_spec_v4' disables emission of sanitation code by the verifier. That means that the networking BPF program loaded with CAP_BPF + CAP_NET_ADMIN will have speculative checks done by the verifier and other spectre mitigation applied. Such networking BPF program will not be able to leak kernel pointers and will not be able to access arbitrary kernel memory. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent a17b53c commit 2c78ee8

File tree

19 files changed

+134
-60
lines changed

19 files changed

+134
-60
lines changed

drivers/media/rc/bpf-lirc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
110110
case BPF_FUNC_get_prandom_u32:
111111
return &bpf_get_prandom_u32_proto;
112112
case BPF_FUNC_trace_printk:
113-
if (capable(CAP_SYS_ADMIN))
113+
if (perfmon_capable())
114114
return bpf_get_trace_printk_proto();
115115
/* fall through */
116116
default:

include/linux/bpf.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/mutex.h>
2020
#include <linux/module.h>
2121
#include <linux/kallsyms.h>
22+
#include <linux/capability.h>
2223

2324
struct bpf_verifier_env;
2425
struct bpf_verifier_log;
@@ -119,7 +120,7 @@ struct bpf_map {
119120
struct bpf_map_memory memory;
120121
char name[BPF_OBJ_NAME_LEN];
121122
u32 btf_vmlinux_value_type_id;
122-
bool unpriv_array;
123+
bool bypass_spec_v1;
123124
bool frozen; /* write-once; write-protected by freeze_mutex */
124125
/* 22 bytes hole */
125126

@@ -1095,6 +1096,21 @@ struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
10951096

10961097
extern int sysctl_unprivileged_bpf_disabled;
10971098

1099+
static inline bool bpf_allow_ptr_leaks(void)
1100+
{
1101+
return perfmon_capable();
1102+
}
1103+
1104+
static inline bool bpf_bypass_spec_v1(void)
1105+
{
1106+
return perfmon_capable();
1107+
}
1108+
1109+
static inline bool bpf_bypass_spec_v4(void)
1110+
{
1111+
return perfmon_capable();
1112+
}
1113+
10981114
int bpf_map_new_fd(struct bpf_map *map, int flags);
10991115
int bpf_prog_new_fd(struct bpf_prog *prog);
11001116

include/linux/bpf_verifier.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,9 @@ struct bpf_verifier_env {
375375
u32 used_map_cnt; /* number of used maps */
376376
u32 id_gen; /* used to generate unique reg IDs */
377377
bool allow_ptr_leaks;
378+
bool bpf_capable;
379+
bool bypass_spec_v1;
380+
bool bypass_spec_v4;
378381
bool seen_direct_write;
379382
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
380383
const struct bpf_line_info *prev_linfo;

kernel/bpf/arraymap.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
7777
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
7878
int ret, numa_node = bpf_map_attr_numa_node(attr);
7979
u32 elem_size, index_mask, max_entries;
80-
bool unpriv = !capable(CAP_SYS_ADMIN);
80+
bool bypass_spec_v1 = bpf_bypass_spec_v1();
8181
u64 cost, array_size, mask64;
8282
struct bpf_map_memory mem;
8383
struct bpf_array *array;
@@ -95,7 +95,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
9595
mask64 -= 1;
9696

9797
index_mask = mask64;
98-
if (unpriv) {
98+
if (!bypass_spec_v1) {
9999
/* round up array size to nearest power of 2,
100100
* since cpu will speculate within index_mask limits
101101
*/
@@ -149,7 +149,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
149149
return ERR_PTR(-ENOMEM);
150150
}
151151
array->index_mask = index_mask;
152-
array->map.unpriv_array = unpriv;
152+
array->map.bypass_spec_v1 = bypass_spec_v1;
153153

154154
/* copy mandatory map attributes */
155155
bpf_map_init_from_attr(&array->map, attr);
@@ -219,7 +219,7 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
219219

220220
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
221221
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
222-
if (map->unpriv_array) {
222+
if (!map->bypass_spec_v1) {
223223
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
224224
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
225225
} else {
@@ -1053,7 +1053,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
10531053

10541054
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
10551055
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1056-
if (map->unpriv_array) {
1056+
if (!map->bypass_spec_v1) {
10571057
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
10581058
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
10591059
} else {

kernel/bpf/bpf_struct_ops.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
557557
struct bpf_map *map;
558558
int err;
559559

560-
if (!capable(CAP_SYS_ADMIN))
560+
if (!bpf_capable())
561561
return ERR_PTR(-EPERM);
562562

563563
st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);

kernel/bpf/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
646646
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
647647
{
648648
if (!bpf_prog_kallsyms_candidate(fp) ||
649-
!capable(CAP_SYS_ADMIN))
649+
!bpf_capable())
650650
return;
651651

652652
bpf_prog_ksym_set_addr(fp);

kernel/bpf/cpumap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
8585
u64 cost;
8686
int ret;
8787

88-
if (!capable(CAP_SYS_ADMIN))
88+
if (!bpf_capable())
8989
return ERR_PTR(-EPERM);
9090

9191
/* check sanity of attributes */

kernel/bpf/hashtab.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,9 @@ static int htab_map_alloc_check(union bpf_attr *attr)
359359
BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
360360
offsetof(struct htab_elem, hash_node.pprev));
361361

362-
if (lru && !capable(CAP_SYS_ADMIN))
362+
if (lru && !bpf_capable())
363363
/* LRU implementation is much complicated than other
364-
* maps. Hence, limit to CAP_SYS_ADMIN for now.
364+
* maps. Hence, limit to CAP_BPF.
365365
*/
366366
return -EPERM;
367367

kernel/bpf/helpers.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
633633
break;
634634
}
635635

636-
if (!capable(CAP_SYS_ADMIN))
636+
if (!bpf_capable())
637637
return NULL;
638638

639639
switch (func_id) {
@@ -642,6 +642,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
642642
case BPF_FUNC_spin_unlock:
643643
return &bpf_spin_unlock_proto;
644644
case BPF_FUNC_trace_printk:
645+
if (!perfmon_capable())
646+
return NULL;
645647
return bpf_get_trace_printk_proto();
646648
case BPF_FUNC_jiffies64:
647649
return &bpf_jiffies64_proto;

kernel/bpf/lpm_trie.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
543543
u64 cost = sizeof(*trie), cost_per_node;
544544
int ret;
545545

546-
if (!capable(CAP_SYS_ADMIN))
546+
if (!bpf_capable())
547547
return ERR_PTR(-EPERM);
548548

549549
/* check sanity of attributes */

kernel/bpf/map_in_map.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
6060
/* Misc members not needed in bpf_map_meta_equal() check. */
6161
inner_map_meta->ops = inner_map->ops;
6262
if (inner_map->ops == &array_map_ops) {
63-
inner_map_meta->unpriv_array = inner_map->unpriv_array;
63+
inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
6464
container_of(inner_map_meta, struct bpf_array, map)->index_mask =
6565
container_of(inner_map, struct bpf_array, map)->index_mask;
6666
}

kernel/bpf/queue_stack_maps.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
4545
/* Called from syscall */
4646
static int queue_stack_map_alloc_check(union bpf_attr *attr)
4747
{
48-
if (!capable(CAP_SYS_ADMIN))
48+
if (!bpf_capable())
4949
return -EPERM;
5050

5151
/* check sanity of attributes */

kernel/bpf/reuseport_array.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
154154
struct bpf_map_memory mem;
155155
u64 array_size;
156156

157-
if (!capable(CAP_SYS_ADMIN))
157+
if (!bpf_capable())
158158
return ERR_PTR(-EPERM);
159159

160160
array_size = sizeof(*array);

kernel/bpf/stackmap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
9393
u64 cost, n_buckets;
9494
int err;
9595

96-
if (!capable(CAP_SYS_ADMIN))
96+
if (!bpf_capable())
9797
return ERR_PTR(-EPERM);
9898

9999
if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)

kernel/bpf/syscall.c

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,7 +1534,7 @@ static int map_freeze(const union bpf_attr *attr)
15341534
err = -EBUSY;
15351535
goto err_put;
15361536
}
1537-
if (!capable(CAP_SYS_ADMIN)) {
1537+
if (!bpf_capable()) {
15381538
err = -EPERM;
15391539
goto err_put;
15401540
}
@@ -2009,6 +2009,55 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
20092009
}
20102010
}
20112011

2012+
static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2013+
{
2014+
switch (prog_type) {
2015+
case BPF_PROG_TYPE_SCHED_CLS:
2016+
case BPF_PROG_TYPE_SCHED_ACT:
2017+
case BPF_PROG_TYPE_XDP:
2018+
case BPF_PROG_TYPE_LWT_IN:
2019+
case BPF_PROG_TYPE_LWT_OUT:
2020+
case BPF_PROG_TYPE_LWT_XMIT:
2021+
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2022+
case BPF_PROG_TYPE_SK_SKB:
2023+
case BPF_PROG_TYPE_SK_MSG:
2024+
case BPF_PROG_TYPE_LIRC_MODE2:
2025+
case BPF_PROG_TYPE_FLOW_DISSECTOR:
2026+
case BPF_PROG_TYPE_CGROUP_DEVICE:
2027+
case BPF_PROG_TYPE_CGROUP_SOCK:
2028+
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2029+
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2030+
case BPF_PROG_TYPE_CGROUP_SYSCTL:
2031+
case BPF_PROG_TYPE_SOCK_OPS:
2032+
case BPF_PROG_TYPE_EXT: /* extends any prog */
2033+
return true;
2034+
case BPF_PROG_TYPE_CGROUP_SKB:
2035+
/* always unpriv */
2036+
case BPF_PROG_TYPE_SK_REUSEPORT:
2037+
/* equivalent to SOCKET_FILTER. need CAP_BPF only */
2038+
default:
2039+
return false;
2040+
}
2041+
}
2042+
2043+
static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2044+
{
2045+
switch (prog_type) {
2046+
case BPF_PROG_TYPE_KPROBE:
2047+
case BPF_PROG_TYPE_TRACEPOINT:
2048+
case BPF_PROG_TYPE_PERF_EVENT:
2049+
case BPF_PROG_TYPE_RAW_TRACEPOINT:
2050+
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2051+
case BPF_PROG_TYPE_TRACING:
2052+
case BPF_PROG_TYPE_LSM:
2053+
case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2054+
case BPF_PROG_TYPE_EXT: /* extends any prog */
2055+
return true;
2056+
default:
2057+
return false;
2058+
}
2059+
}
2060+
20122061
/* last field in 'union bpf_attr' used by this command */
20132062
#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
20142063

@@ -2031,7 +2080,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
20312080

20322081
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
20332082
(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2034-
!capable(CAP_SYS_ADMIN))
2083+
!bpf_capable())
20352084
return -EPERM;
20362085

20372086
/* copy eBPF program license from user space */
@@ -2044,11 +2093,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
20442093
is_gpl = license_is_gpl_compatible(license);
20452094

20462095
if (attr->insn_cnt == 0 ||
2047-
attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
2096+
attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
20482097
return -E2BIG;
20492098
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
20502099
type != BPF_PROG_TYPE_CGROUP_SKB &&
2051-
!capable(CAP_SYS_ADMIN))
2100+
!bpf_capable())
2101+
return -EPERM;
2102+
2103+
if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN))
2104+
return -EPERM;
2105+
if (is_perfmon_prog_type(type) && !perfmon_capable())
20522106
return -EPERM;
20532107

20542108
bpf_prog_load_fixup_attach_type(attr);
@@ -2682,6 +2736,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
26822736
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
26832737
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
26842738
case BPF_PROG_TYPE_CGROUP_SKB:
2739+
if (!capable(CAP_NET_ADMIN))
2740+
/* cg-skb progs can be loaded by unpriv user.
2741+
* check permissions at attach time.
2742+
*/
2743+
return -EPERM;
26852744
return prog->enforce_expected_attach_type &&
26862745
prog->expected_attach_type != attach_type ?
26872746
-EINVAL : 0;
@@ -2747,9 +2806,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
27472806
struct bpf_prog *prog;
27482807
int ret;
27492808

2750-
if (!capable(CAP_NET_ADMIN))
2751-
return -EPERM;
2752-
27532809
if (CHECK_ATTR(BPF_PROG_ATTACH))
27542810
return -EINVAL;
27552811

@@ -2804,9 +2860,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
28042860
{
28052861
enum bpf_prog_type ptype;
28062862

2807-
if (!capable(CAP_NET_ADMIN))
2808-
return -EPERM;
2809-
28102863
if (CHECK_ATTR(BPF_PROG_DETACH))
28112864
return -EINVAL;
28122865

@@ -2819,6 +2872,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
28192872
case BPF_PROG_TYPE_LIRC_MODE2:
28202873
return lirc_prog_detach(attr);
28212874
case BPF_PROG_TYPE_FLOW_DISSECTOR:
2875+
if (!capable(CAP_NET_ADMIN))
2876+
return -EPERM;
28222877
return skb_flow_dissector_bpf_prog_detach(attr);
28232878
case BPF_PROG_TYPE_CGROUP_DEVICE:
28242879
case BPF_PROG_TYPE_CGROUP_SKB:
@@ -2882,8 +2937,6 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
28822937
struct bpf_prog *prog;
28832938
int ret = -ENOTSUPP;
28842939

2885-
if (!capable(CAP_SYS_ADMIN))
2886-
return -EPERM;
28872940
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
28882941
return -EINVAL;
28892942

@@ -3184,7 +3237,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
31843237
info.run_time_ns = stats.nsecs;
31853238
info.run_cnt = stats.cnt;
31863239

3187-
if (!capable(CAP_SYS_ADMIN)) {
3240+
if (!bpf_capable()) {
31883241
info.jited_prog_len = 0;
31893242
info.xlated_prog_len = 0;
31903243
info.nr_jited_ksyms = 0;
@@ -3543,7 +3596,7 @@ static int bpf_btf_load(const union bpf_attr *attr)
35433596
if (CHECK_ATTR(BPF_BTF_LOAD))
35443597
return -EINVAL;
35453598

3546-
if (!capable(CAP_SYS_ADMIN))
3599+
if (!bpf_capable())
35473600
return -EPERM;
35483601

35493602
return btf_new_fd(attr);
@@ -3766,9 +3819,6 @@ static int link_create(union bpf_attr *attr)
37663819
struct bpf_prog *prog;
37673820
int ret;
37683821

3769-
if (!capable(CAP_NET_ADMIN))
3770-
return -EPERM;
3771-
37723822
if (CHECK_ATTR(BPF_LINK_CREATE))
37733823
return -EINVAL;
37743824

@@ -3817,9 +3867,6 @@ static int link_update(union bpf_attr *attr)
38173867
u32 flags;
38183868
int ret;
38193869

3820-
if (!capable(CAP_NET_ADMIN))
3821-
return -EPERM;
3822-
38233870
if (CHECK_ATTR(BPF_LINK_UPDATE))
38243871
return -EINVAL;
38253872

@@ -3988,7 +4035,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
39884035
union bpf_attr attr;
39894036
int err;
39904037

3991-
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
4038+
if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
39924039
return -EPERM;
39934040

39944041
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);

0 commit comments

Comments
 (0)