Skip to content

Commit 9facc33

Browse files
borkmannAlexei Starovoitov
authored andcommitted
bpf: reject any prog that failed read-only lock
We currently lock any JITed image as read-only via bpf_jit_binary_lock_ro() as well as the BPF image as read-only through bpf_prog_lock_ro(). In the case any of these would fail we throw a WARN_ON_ONCE() in order to yell loudly to the log. Perhaps, to some extend, this may be comparable to an allocation where __GFP_NOWARN is explicitly not set. Added via 65869a4 ("bpf: improve read-only handling"), this behavior is slightly different compared to any of the other in-kernel set_memory_ro() users who do not check the return code of set_memory_ro() and friends /at all/ (e.g. in the case of module_enable_ro() / module_disable_ro()). Given in BPF this is mandatory hardening step, we want to know whether there are any issues that would leave both BPF data writable. So it happens that syzkaller enabled fault injection and it triggered memory allocation failure deep inside x86's change_page_attr_set_clr() which was triggered from set_memory_ro(). Now, there are two options: i) leaving everything as is, and ii) reworking the image locking code in order to have a final checkpoint out of the central bpf_prog_select_runtime() which probes whether any of the calls during prog setup weren't successful, and then bailing out with an error. Option ii) is a better approach since this additional paranoia avoids altogether leaving any potential W+X pages from BPF side in the system. Therefore, lets be strict about it, and reject programs in such unlikely occasion. While testing I noticed also that one bpf_prog_lock_ro() call was missing on the outer dummy prog in case of calls, e.g. in the destructor we call bpf_prog_free_deferred() on the main prog where we try to bpf_prog_unlock_free() the program, and since we go via bpf_prog_select_runtime() do that as well. Reported-by: [email protected] Reported-by: [email protected] Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 7d1982b commit 9facc33

File tree

3 files changed

+87
-32
lines changed

3 files changed

+87
-32
lines changed

include/linux/filter.h

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,8 @@ struct sock_fprog_kern {
469469
};
470470

471471
struct bpf_binary_header {
472-
unsigned int pages;
472+
u16 pages;
473+
u16 locked:1;
473474
u8 image[];
474475
};
475476

@@ -671,50 +672,49 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
671672

672673
#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
673674

674-
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
675675
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
676676
{
677+
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
677678
fp->locked = 1;
678-
WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
679+
if (set_memory_ro((unsigned long)fp, fp->pages))
680+
fp->locked = 0;
681+
#endif
679682
}
680683

681684
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
682685
{
686+
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
683687
if (fp->locked) {
684688
WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
685689
/* In case set_memory_rw() fails, we want to be the first
686690
* to crash here instead of some random place later on.
687691
*/
688692
fp->locked = 0;
689693
}
694+
#endif
690695
}
691696

692697
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
693698
{
694-
WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
695-
}
696-
697-
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
698-
{
699-
WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
700-
}
701-
#else
702-
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
703-
{
704-
}
705-
706-
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
707-
{
708-
}
709-
710-
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
711-
{
699+
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
700+
hdr->locked = 1;
701+
if (set_memory_ro((unsigned long)hdr, hdr->pages))
702+
hdr->locked = 0;
703+
#endif
712704
}
713705

714706
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
715707
{
708+
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
709+
if (hdr->locked) {
710+
WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
711+
/* In case set_memory_rw() fails, we want to be the first
712+
* to crash here instead of some random place later on.
713+
*/
714+
hdr->locked = 0;
715+
}
716+
#endif
716717
}
717-
#endif /* CONFIG_ARCH_HAS_SET_MEMORY */
718718

719719
static inline struct bpf_binary_header *
720720
bpf_jit_binary_hdr(const struct bpf_prog *fp)
@@ -725,6 +725,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
725725
return (void *)addr;
726726
}
727727

728+
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
729+
static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
730+
{
731+
if (!fp->locked)
732+
return -ENOLCK;
733+
if (fp->jited) {
734+
const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
735+
736+
if (!hdr->locked)
737+
return -ENOLCK;
738+
}
739+
740+
return 0;
741+
}
742+
#endif
743+
728744
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
729745
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
730746
{

kernel/bpf/core.c

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
598598
bpf_fill_ill_insns(hdr, size);
599599

600600
hdr->pages = size / PAGE_SIZE;
601+
hdr->locked = 0;
602+
601603
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
602604
PAGE_SIZE - sizeof(*hdr));
603605
start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1448,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
14481450
return 0;
14491451
}
14501452

1453+
static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
1454+
{
1455+
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
1456+
int i, err;
1457+
1458+
for (i = 0; i < fp->aux->func_cnt; i++) {
1459+
err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
1460+
if (err)
1461+
return err;
1462+
}
1463+
1464+
return bpf_prog_check_pages_ro_single(fp);
1465+
#endif
1466+
return 0;
1467+
}
1468+
1469+
static void bpf_prog_select_func(struct bpf_prog *fp)
1470+
{
1471+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1472+
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
1473+
1474+
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
1475+
#else
1476+
fp->bpf_func = __bpf_prog_ret0_warn;
1477+
#endif
1478+
}
1479+
14511480
/**
14521481
* bpf_prog_select_runtime - select exec runtime for BPF program
14531482
* @fp: bpf_prog populated with internal BPF program
@@ -1458,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
14581487
*/
14591488
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
14601489
{
1461-
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1462-
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
1490+
/* In case of BPF to BPF calls, verifier did all the prep
1491+
* work with regards to JITing, etc.
1492+
*/
1493+
if (fp->bpf_func)
1494+
goto finalize;
14631495

1464-
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
1465-
#else
1466-
fp->bpf_func = __bpf_prog_ret0_warn;
1467-
#endif
1496+
bpf_prog_select_func(fp);
14681497

14691498
/* eBPF JITs can rewrite the program in case constant
14701499
* blinding is active. However, in case of error during
@@ -1485,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
14851514
if (*err)
14861515
return fp;
14871516
}
1517+
1518+
finalize:
14881519
bpf_prog_lock_ro(fp);
14891520

14901521
/* The tail call compatibility check can only be done at
@@ -1493,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
14931524
* all eBPF JITs might immediately support all features.
14941525
*/
14951526
*err = bpf_check_tail_call(fp);
1496-
1527+
if (*err)
1528+
return fp;
1529+
1530+
/* Checkpoint: at this point onwards any cBPF -> eBPF or
1531+
* native eBPF program is read-only. If we failed to change
1532+
* the page attributes (e.g. allocation failure from
1533+
* splitting large pages), then reject the whole program
1534+
* in order to guarantee not ending up with any W+X pages
1535+
* from BPF side in kernel.
1536+
*/
1537+
*err = bpf_prog_check_pages_ro_locked(fp);
14971538
return fp;
14981539
}
14991540
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

kernel/bpf/syscall.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr)
13531353
if (err < 0)
13541354
goto free_used_maps;
13551355

1356-
/* eBPF program is ready to be JITed */
1357-
if (!prog->bpf_func)
1358-
prog = bpf_prog_select_runtime(prog, &err);
1356+
prog = bpf_prog_select_runtime(prog, &err);
13591357
if (err < 0)
13601358
goto free_used_maps;
13611359

0 commit comments

Comments
 (0)