Skip to content

Commit 85782e0

Browse files
borkmannAlexei Starovoitov
authored andcommitted
bpf: undo prog rejection on read-only lock failure
Partially undo commit 9facc33 ("bpf: reject any prog that failed read-only lock") since it caused a regression, that is, syzkaller was able to manage to cause a panic via fault injection deep in set_memory_ro() path by letting an allocation fail: In x86's __change_page_attr_set_clr() it was able to change the attributes of the primary mapping but not in the alias mapping via cpa_process_alias(), so the second, inner call to the __change_page_attr() via __change_page_attr_set_clr() had to split a larger page and failed in the alloc_pages() with the artifically triggered allocation error which is then propagated down to the call site. Thus, for set_memory_ro() this means that it returned with an error, but from debugging a probe_kernel_write() revealed EFAULT on that memory since the primary mapping succeeded to get changed. Therefore the subsequent hdr->locked = 0 reset triggered the panic as it was performed on read-only memory, so call-site assumptions were infact wrong to assume that it would either succeed /or/ not succeed at all since there's no such rollback in set_memory_*() calls from partial change of mappings, in other words, we're left in a state that is "half done". A later undo via set_memory_rw() is succeeding though due to matching permissions on that part (aka due to the try_preserve_large_page() succeeding). While reproducing locally with explicitly triggering this error, the initial splitting only happens on rare occasions and in real world it would additionally need oom conditions, but that said, it could partially fail. Therefore, it is definitely wrong to bail out on set_memory_ro() error and reject the program with the set_memory_*() semantics we have today. Shouldn't have gone the extra mile since no other user in tree today infact checks for any set_memory_*() errors, e.g. neither module_enable_ro() / module_disable_ro() for module RO/NX handling which is mostly default these days nor kprobes core with alloc_insn_page() / free_insn_page() as examples that could be invoked long after bootup and original 314beb9 ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") did neither when it got first introduced to BPF so "improving" with bailing out was clearly not right when set_memory_*() cannot handle it today. Kees suggested that if set_memory_*() can fail, we should annotate it with __must_check, and all callers need to deal with it gracefully given those set_memory_*() markings aren't "advisory", but they're expected to actually do what they say. This might be an option worth to move forward in future but would at the same time require that set_memory_*() calls from supporting archs are guaranteed to be "atomic" in that they provide rollback if part of the range fails, once that happened, the transition from RW -> RO could be made more robust that way, while subsequent RO -> RW transition /must/ continue guaranteeing to always succeed the undo part. Reported-by: [email protected] Reported-by: [email protected] Fixes: 9facc33 ("bpf: reject any prog that failed read-only lock") Cc: Laura Abbott <[email protected]> Cc: Kees Cook <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent f605ce5 commit 85782e0

File tree

2 files changed

+9
-77
lines changed

2 files changed

+9
-77
lines changed

include/linux/filter.h

Lines changed: 8 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -470,9 +470,7 @@ struct sock_fprog_kern {
470470
};
471471

472472
struct bpf_binary_header {
473-
u16 pages;
474-
u16 locked:1;
475-
473+
u32 pages;
476474
/* Some arches need word alignment for their instructions */
477475
u8 image[] __aligned(4);
478476
};
@@ -481,7 +479,7 @@ struct bpf_prog {
481479
u16 pages; /* Number of allocated pages */
482480
u16 jited:1, /* Is our filter JIT'ed? */
483481
jit_requested:1,/* archs need to JIT the prog */
484-
locked:1, /* Program image locked? */
482+
undo_set_mem:1, /* Passed set_memory_ro() checkpoint */
485483
gpl_compatible:1, /* Is filter GPL compatible? */
486484
cb_access:1, /* Is control block accessed? */
487485
dst_needed:1, /* Do we need dst entry? */
@@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
677675

678676
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
679677
{
680-
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
681-
fp->locked = 1;
682-
if (set_memory_ro((unsigned long)fp, fp->pages))
683-
fp->locked = 0;
684-
#endif
678+
fp->undo_set_mem = 1;
679+
set_memory_ro((unsigned long)fp, fp->pages);
685680
}
686681

687682
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
688683
{
689-
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
690-
if (fp->locked) {
691-
WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
692-
/* In case set_memory_rw() fails, we want to be the first
693-
* to crash here instead of some random place later on.
694-
*/
695-
fp->locked = 0;
696-
}
697-
#endif
684+
if (fp->undo_set_mem)
685+
set_memory_rw((unsigned long)fp, fp->pages);
698686
}
699687

700688
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
701689
{
702-
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
703-
hdr->locked = 1;
704-
if (set_memory_ro((unsigned long)hdr, hdr->pages))
705-
hdr->locked = 0;
706-
#endif
690+
set_memory_ro((unsigned long)hdr, hdr->pages);
707691
}
708692

709693
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
710694
{
711-
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
712-
if (hdr->locked) {
713-
WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
714-
/* In case set_memory_rw() fails, we want to be the first
715-
* to crash here instead of some random place later on.
716-
*/
717-
hdr->locked = 0;
718-
}
719-
#endif
695+
set_memory_rw((unsigned long)hdr, hdr->pages);
720696
}
721697

722698
static inline struct bpf_binary_header *
@@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
728704
return (void *)addr;
729705
}
730706

731-
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
732-
static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
733-
{
734-
if (!fp->locked)
735-
return -ENOLCK;
736-
if (fp->jited) {
737-
const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
738-
739-
if (!hdr->locked)
740-
return -ENOLCK;
741-
}
742-
743-
return 0;
744-
}
745-
#endif
746-
747707
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
748708
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
749709
{

kernel/bpf/core.c

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
598598
bpf_fill_ill_insns(hdr, size);
599599

600600
hdr->pages = size / PAGE_SIZE;
601-
hdr->locked = 0;
602-
603601
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
604602
PAGE_SIZE - sizeof(*hdr));
605603
start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
14501448
return 0;
14511449
}
14521450

1453-
static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
1454-
{
1455-
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
1456-
int i, err;
1457-
1458-
for (i = 0; i < fp->aux->func_cnt; i++) {
1459-
err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
1460-
if (err)
1461-
return err;
1462-
}
1463-
1464-
return bpf_prog_check_pages_ro_single(fp);
1465-
#endif
1466-
return 0;
1467-
}
1468-
14691451
static void bpf_prog_select_func(struct bpf_prog *fp)
14701452
{
14711453
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -1524,17 +1506,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
15241506
* all eBPF JITs might immediately support all features.
15251507
*/
15261508
*err = bpf_check_tail_call(fp);
1527-
if (*err)
1528-
return fp;
1529-
1530-
/* Checkpoint: at this point onwards any cBPF -> eBPF or
1531-
* native eBPF program is read-only. If we failed to change
1532-
* the page attributes (e.g. allocation failure from
1533-
* splitting large pages), then reject the whole program
1534-
* in order to guarantee not ending up with any W+X pages
1535-
* from BPF side in kernel.
1536-
*/
1537-
*err = bpf_prog_check_pages_ro_locked(fp);
1509+
15381510
return fp;
15391511
}
15401512
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

0 commit comments

Comments
 (0)