Skip to content

Commit 661e4e3

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2018-01-09 The following pull-request contains BPF updates for your *net* tree. The main changes are: 1) Prevent out-of-bounds speculation in BPF maps by masking the index after bounds checks in order to fix spectre v1, and add an option BPF_JIT_ALWAYS_ON into Kconfig that allows for removing the BPF interpreter from the kernel in favor of JIT-only mode to make spectre v2 harder, from Alexei. 2) Remove false sharing of map refcount with max_entries which was used in spectre v1, from Daniel. 3) Add a missing NULL psock check in sockmap in order to fix a race, from John. 4) Fix test_align BPF selftest case since a recent change in verifier rejects the bit-wise arithmetic on pointers earlier but test_align update was missing, from Alexei. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 4512c43 + 290af86 commit 661e4e3

File tree

11 files changed

+150
-50
lines changed

11 files changed

+150
-50
lines changed

include/linux/bpf.h

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@ struct bpf_map_ops {
4343
};
4444

4545
struct bpf_map {
46-
atomic_t refcnt;
46+
/* 1st cacheline with read-mostly members of which some
47+
* are also accessed in fast-path (e.g. ops, max_entries).
48+
*/
49+
const struct bpf_map_ops *ops ____cacheline_aligned;
50+
struct bpf_map *inner_map_meta;
51+
#ifdef CONFIG_SECURITY
52+
void *security;
53+
#endif
4754
enum bpf_map_type map_type;
4855
u32 key_size;
4956
u32 value_size;
@@ -52,15 +59,17 @@ struct bpf_map {
5259
u32 pages;
5360
u32 id;
5461
int numa_node;
55-
struct user_struct *user;
56-
const struct bpf_map_ops *ops;
57-
struct work_struct work;
62+
bool unpriv_array;
63+
/* 7 bytes hole */
64+
65+
/* 2nd cacheline with misc members to avoid false sharing
66+
* particularly with refcounting.
67+
*/
68+
struct user_struct *user ____cacheline_aligned;
69+
atomic_t refcnt;
5870
atomic_t usercnt;
59-
struct bpf_map *inner_map_meta;
71+
struct work_struct work;
6072
char name[BPF_OBJ_NAME_LEN];
61-
#ifdef CONFIG_SECURITY
62-
void *security;
63-
#endif
6473
};
6574

6675
/* function argument constraints */
@@ -221,6 +230,7 @@ struct bpf_prog_aux {
221230
struct bpf_array {
222231
struct bpf_map map;
223232
u32 elem_size;
233+
u32 index_mask;
224234
/* 'ownership' of prog_array is claimed by the first program that
225235
* is going to use this map or by the first program which FD is stored
226236
* in the map to make sure that all callers and callees have the same

init/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,6 +1396,13 @@ config BPF_SYSCALL
13961396
Enable the bpf() system call that allows to manipulate eBPF
13971397
programs and maps via file descriptors.
13981398

1399+
config BPF_JIT_ALWAYS_ON
1400+
bool "Permanently enable BPF JIT and remove BPF interpreter"
1401+
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
1402+
help
1403+
Enables BPF JIT and removes BPF interpreter to avoid
1404+
speculative execution of BPF instructions by the interpreter
1405+
13991406
config USERFAULTFD
14001407
bool "Enable userfaultfd() system call"
14011408
select ANON_INODES

kernel/bpf/arraymap.c

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
5353
{
5454
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
5555
int numa_node = bpf_map_attr_numa_node(attr);
56+
u32 elem_size, index_mask, max_entries;
57+
bool unpriv = !capable(CAP_SYS_ADMIN);
5658
struct bpf_array *array;
5759
u64 array_size;
58-
u32 elem_size;
5960

6061
/* check sanity of attributes */
6162
if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
7273

7374
elem_size = round_up(attr->value_size, 8);
7475

76+
max_entries = attr->max_entries;
77+
index_mask = roundup_pow_of_two(max_entries) - 1;
78+
79+
if (unpriv)
80+
/* round up array size to nearest power of 2,
81+
* since cpu will speculate within index_mask limits
82+
*/
83+
max_entries = index_mask + 1;
84+
7585
array_size = sizeof(*array);
7686
if (percpu)
77-
array_size += (u64) attr->max_entries * sizeof(void *);
87+
array_size += (u64) max_entries * sizeof(void *);
7888
else
79-
array_size += (u64) attr->max_entries * elem_size;
89+
array_size += (u64) max_entries * elem_size;
8090

8191
/* make sure there is no u32 overflow later in round_up() */
8292
if (array_size >= U32_MAX - PAGE_SIZE)
@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
8696
array = bpf_map_area_alloc(array_size, numa_node);
8797
if (!array)
8898
return ERR_PTR(-ENOMEM);
99+
array->index_mask = index_mask;
100+
array->map.unpriv_array = unpriv;
89101

90102
/* copy mandatory map attributes */
91103
array->map.map_type = attr->map_type;
@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
121133
if (unlikely(index >= array->map.max_entries))
122134
return NULL;
123135

124-
return array->value + array->elem_size * index;
136+
return array->value + array->elem_size * (index & array->index_mask);
125137
}
126138

127139
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
128140
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
129141
{
142+
struct bpf_array *array = container_of(map, struct bpf_array, map);
130143
struct bpf_insn *insn = insn_buf;
131144
u32 elem_size = round_up(map->value_size, 8);
132145
const int ret = BPF_REG_0;
@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
135148

136149
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
137150
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
138-
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
151+
if (map->unpriv_array) {
152+
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
153+
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
154+
} else {
155+
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
156+
}
139157

140158
if (is_power_of_2(elem_size)) {
141159
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
157175
if (unlikely(index >= array->map.max_entries))
158176
return NULL;
159177

160-
return this_cpu_ptr(array->pptrs[index]);
178+
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
161179
}
162180

163181
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
177195
*/
178196
size = round_up(map->value_size, 8);
179197
rcu_read_lock();
180-
pptr = array->pptrs[index];
198+
pptr = array->pptrs[index & array->index_mask];
181199
for_each_possible_cpu(cpu) {
182200
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
183201
off += size;
@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
225243
return -EEXIST;
226244

227245
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
228-
memcpy(this_cpu_ptr(array->pptrs[index]),
246+
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
229247
value, map->value_size);
230248
else
231-
memcpy(array->value + array->elem_size * index,
249+
memcpy(array->value +
250+
array->elem_size * (index & array->index_mask),
232251
value, map->value_size);
233252
return 0;
234253
}
@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
262281
*/
263282
size = round_up(map->value_size, 8);
264283
rcu_read_lock();
265-
pptr = array->pptrs[index];
284+
pptr = array->pptrs[index & array->index_mask];
266285
for_each_possible_cpu(cpu) {
267286
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
268287
off += size;
@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
613632
static u32 array_of_map_gen_lookup(struct bpf_map *map,
614633
struct bpf_insn *insn_buf)
615634
{
635+
struct bpf_array *array = container_of(map, struct bpf_array, map);
616636
u32 elem_size = round_up(map->value_size, 8);
617637
struct bpf_insn *insn = insn_buf;
618638
const int ret = BPF_REG_0;
@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
621641

622642
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
623643
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
624-
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
644+
if (map->unpriv_array) {
645+
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
646+
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
647+
} else {
648+
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
649+
}
625650
if (is_power_of_2(elem_size))
626651
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
627652
else

kernel/bpf/core.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
767767
}
768768
EXPORT_SYMBOL_GPL(__bpf_call_base);
769769

770+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
770771
/**
771772
* __bpf_prog_run - run eBPF program on a given context
772773
* @ctx: is the data we are operating on
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
13171318
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
13181319
};
13191320

1321+
#else
1322+
static unsigned int __bpf_prog_ret0(const void *ctx,
1323+
const struct bpf_insn *insn)
1324+
{
1325+
return 0;
1326+
}
1327+
#endif
1328+
13201329
bool bpf_prog_array_compatible(struct bpf_array *array,
13211330
const struct bpf_prog *fp)
13221331
{
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
13641373
*/
13651374
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
13661375
{
1376+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
13671377
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
13681378

13691379
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
1380+
#else
1381+
fp->bpf_func = __bpf_prog_ret0;
1382+
#endif
13701383

13711384
/* eBPF JITs can rewrite the program in case constant
13721385
* blinding is active. However, in case of error during
@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
13761389
*/
13771390
if (!bpf_prog_is_dev_bound(fp->aux)) {
13781391
fp = bpf_int_jit_compile(fp);
1392+
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
1393+
if (!fp->jited) {
1394+
*err = -ENOTSUPP;
1395+
return fp;
1396+
}
1397+
#endif
13791398
} else {
13801399
*err = bpf_prog_offload_compile(fp);
13811400
if (*err)

kernel/bpf/sockmap.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)
591591

592592
write_lock_bh(&sock->sk_callback_lock);
593593
psock = smap_psock_sk(sock);
594-
smap_list_remove(psock, &stab->sock_map[i]);
595-
smap_release_sock(psock, sock);
594+
/* This check handles a racing sock event that can get the
595+
* sk_callback_lock before this case but after xchg happens
596+
* causing the refcnt to hit zero and sock user data (psock)
597+
* to be null and queued for garbage collection.
598+
*/
599+
if (likely(psock)) {
600+
smap_list_remove(psock, &stab->sock_map[i]);
601+
smap_release_sock(psock, sock);
602+
}
596603
write_unlock_bh(&sock->sk_callback_lock);
597604
}
598605
rcu_read_unlock();

kernel/bpf/verifier.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
17291729
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
17301730
if (err)
17311731
return err;
1732+
if (func_id == BPF_FUNC_tail_call) {
1733+
if (meta.map_ptr == NULL) {
1734+
verbose(env, "verifier bug\n");
1735+
return -EINVAL;
1736+
}
1737+
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
1738+
}
17321739
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
17331740
if (err)
17341741
return err;
@@ -4456,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
44564463
*/
44574464
insn->imm = 0;
44584465
insn->code = BPF_JMP | BPF_TAIL_CALL;
4466+
4467+
/* instead of changing every JIT dealing with tail_call
4468+
* emit two extra insns:
4469+
* if (index >= max_entries) goto out;
4470+
* index &= array->index_mask;
4471+
* to avoid out-of-bounds cpu speculation
4472+
*/
4473+
map_ptr = env->insn_aux_data[i + delta].map_ptr;
4474+
if (map_ptr == BPF_MAP_PTR_POISON) {
4475+
verbose(env, "tail_call obusing map_ptr\n");
4476+
return -EINVAL;
4477+
}
4478+
if (!map_ptr->unpriv_array)
4479+
continue;
4480+
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
4481+
map_ptr->max_entries, 2);
4482+
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
4483+
container_of(map_ptr,
4484+
struct bpf_array,
4485+
map)->index_mask);
4486+
insn_buf[2] = *insn;
4487+
cnt = 3;
4488+
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
4489+
if (!new_prog)
4490+
return -ENOMEM;
4491+
4492+
delta += cnt - 1;
4493+
env->prog = prog = new_prog;
4494+
insn = new_prog->insnsi + i + delta;
44594495
continue;
44604496
}
44614497

lib/test_bpf.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6250,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
62506250
return NULL;
62516251
}
62526252
}
6253-
/* We don't expect to fail. */
62546253
if (*err) {
6255-
pr_cont("FAIL to attach err=%d len=%d\n",
6254+
pr_cont("FAIL to prog_create err=%d len=%d\n",
62566255
*err, fprog.len);
62576256
return NULL;
62586257
}
@@ -6276,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
62766275
* checks.
62776276
*/
62786277
fp = bpf_prog_select_runtime(fp, err);
6278+
if (*err) {
6279+
pr_cont("FAIL to select_runtime err=%d\n", *err);
6280+
return NULL;
6281+
}
62796282
break;
62806283
}
62816284

@@ -6461,8 +6464,8 @@ static __init int test_bpf(void)
64616464
pass_cnt++;
64626465
continue;
64636466
}
6464-
6465-
return err;
6467+
err_cnt++;
6468+
continue;
64666469
}
64676470

64686471
pr_cont("jited:%u ", fp->jited);

net/core/filter.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
10541054
*/
10551055
goto out_err_free;
10561056

1057-
/* We are guaranteed to never error here with cBPF to eBPF
1058-
* transitions, since there's no issue with type compatibility
1059-
* checks on program arrays.
1060-
*/
10611057
fp = bpf_prog_select_runtime(fp, &err);
1058+
if (err)
1059+
goto out_err_free;
10621060

10631061
kfree(old_prog);
10641062
return fp;

net/core/sysctl_net_core.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
325325
.data = &bpf_jit_enable,
326326
.maxlen = sizeof(int),
327327
.mode = 0644,
328+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
328329
.proc_handler = proc_dointvec
330+
#else
331+
.proc_handler = proc_dointvec_minmax,
332+
.extra1 = &one,
333+
.extra2 = &one,
334+
#endif
329335
},
330336
# ifdef CONFIG_HAVE_EBPF_JIT
331337
{

net/socket.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2619,6 +2619,15 @@ static int __init sock_init(void)
26192619

26202620
core_initcall(sock_init); /* early initcall */
26212621

2622+
static int __init jit_init(void)
2623+
{
2624+
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
2625+
bpf_jit_enable = 1;
2626+
#endif
2627+
return 0;
2628+
}
2629+
pure_initcall(jit_init);
2630+
26222631
#ifdef CONFIG_PROC_FS
26232632
void socket_seq_show(struct seq_file *seq)
26242633
{

0 commit comments

Comments
 (0)