Skip to content

Commit 74451e6

Browse files
borkmanndavem330
authored andcommitted
bpf: make jited programs visible in traces
Long standing issue with JITed programs is that stack traces from function tracing check whether a given address is kernel code through {__,}kernel_text_address(), which checks for code in core kernel, modules and dynamically allocated ftrace trampolines. But what is still missing is BPF JITed programs (interpreted programs are not an issue as __bpf_prog_run() will be attributed to them), thus when a stack trace is triggered, the code walking the stack won't see any of the JITed ones. The same for address correlation done from user space via reading /proc/kallsyms. This is read by tools like perf, but the latter is also useful for permanent live tracing with eBPF itself in combination with stack maps when other eBPF types are part of the callchain. See offwaketime example on dumping stack from a map. This work tries to tackle that issue by making the addresses and symbols known to the kernel. The lookup from *kernel_text_address() is implemented through a latched RB tree that can be read under RCU in fast-path that is also shared for symbol/size/offset lookup for a specific given address in kallsyms. The slow-path iteration through all symbols in the seq file done via RCU list, which holds a tiny fraction of all exported ksyms, usually below 0.1 percent. Function symbols are exported as bpf_prog_<tag>, in order to aide debugging and attribution. This facility is currently enabled for root-only when bpf_jit_kallsyms is set to 1, and disabled if hardening is active in any mode. The rationale behind this is that still a lot of systems ship with world read permissions on kallsyms thus addresses should not get suddenly exposed for them. If that situation gets much better in future, we always have the option to change the default on this. Likewise, unprivileged programs are not allowed to add entries there either, but that is less of a concern as most such programs types relevant in this context are for root-only anyway. If enabled, call graphs and stack traces will then show a correct attribution; one example is illustrated below, where the trace is now visible in tooling such as perf script --kallsyms=/proc/kallsyms and friends. Before: 7fff8166889d bpf_clone_redirect+0x80007f0020ed (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff006451f1a007 (/usr/lib64/libc-2.18.so) After: 7fff816688b7 bpf_clone_redirect+0x80007f002107 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa0575728 bpf_prog_33c45a467c9e061a+0x8000600020fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa07ef1fc cls_bpf_classify+0x8000600020dc (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81678b68 tc_classify+0x80007f002078 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d40b __netif_receive_skb_core+0x80007f0025fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d718 __netif_receive_skb+0x80007f002018 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164e565 process_backlog+0x80007f002095 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164dc71 net_rx_action+0x80007f002231 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81767461 __softirqentry_text_start+0x80007f0020d1 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817658ac do_softirq_own_stack+0x80007f00201c (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2c20 do_softirq+0x80007f002050 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2cb5 __local_bh_enable_ip+0x80007f002085 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168d452 ip_finish_output2+0x80007f002152 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168ea3d ip_finish_output+0x80007f00217d (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168f2af ip_output+0x80007f00203f (/lib/modules/4.9.0-rc8+/build/vmlinux) [...] 7fff81005854 do_syscall_64+0x80007f002054 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817649eb return_from_SYSCALL_64+0x80007f002000 (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff01c484812007 (/usr/lib64/libc-2.18.so) Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Cc: [email protected] Signed-off-by: David S. Miller <[email protected]>
1 parent 9383191 commit 74451e6

File tree

13 files changed

+419
-63
lines changed

13 files changed

+419
-63
lines changed

Documentation/sysctl/net.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,18 @@ Values :
5454
1 - enable JIT hardening for unprivileged users only
5555
2 - enable JIT hardening for all users
5656

57+
bpf_jit_kallsyms
58+
----------------
59+
60+
When Berkeley Packet Filter Just in Time compiler is enabled, then compiled
61+
images are unknown addresses to the kernel, meaning they neither show up in
62+
traces nor in /proc/kallsyms. This enables export of these addresses, which
63+
can be used for debugging/tracing. If bpf_jit_harden is enabled, this feature
64+
is disabled.
65+
Values :
66+
0 - disable JIT kallsyms export (default value)
67+
1 - enable JIT kallsyms export for privileged users only
68+
5769
dev_weight
5870
--------------
5971

arch/arm64/net/bpf_jit_comp.c

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -910,18 +910,3 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
910910
tmp : orig_prog);
911911
return prog;
912912
}
913-
914-
void bpf_jit_free(struct bpf_prog *prog)
915-
{
916-
unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK;
917-
struct bpf_binary_header *header = (void *)addr;
918-
919-
if (!prog->jited)
920-
goto free_filter;
921-
922-
set_memory_rw(addr, header->pages);
923-
bpf_jit_binary_free(header);
924-
925-
free_filter:
926-
bpf_prog_unlock_free(prog);
927-
}

arch/powerpc/net/bpf_jit_comp64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
10641064
return fp;
10651065
}
10661066

1067+
/* Overriding bpf_jit_free() as we don't set images read-only. */
10671068
void bpf_jit_free(struct bpf_prog *fp)
10681069
{
10691070
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;

arch/s390/net/bpf_jit_comp.c

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,21 +1339,3 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
13391339
tmp : orig_fp);
13401340
return fp;
13411341
}
1342-
1343-
/*
1344-
* Free eBPF program
1345-
*/
1346-
void bpf_jit_free(struct bpf_prog *fp)
1347-
{
1348-
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
1349-
struct bpf_binary_header *header = (void *)addr;
1350-
1351-
if (!fp->jited)
1352-
goto free_filter;
1353-
1354-
set_memory_rw(addr, header->pages);
1355-
bpf_jit_binary_free(header);
1356-
1357-
free_filter:
1358-
bpf_prog_unlock_free(fp);
1359-
}

arch/x86/net/bpf_jit_comp.c

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,18 +1180,3 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
11801180
tmp : orig_prog);
11811181
return prog;
11821182
}
1183-
1184-
void bpf_jit_free(struct bpf_prog *fp)
1185-
{
1186-
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
1187-
struct bpf_binary_header *header = (void *)addr;
1188-
1189-
if (!fp->jited)
1190-
goto free_filter;
1191-
1192-
set_memory_rw(addr, header->pages);
1193-
bpf_jit_binary_free(header);
1194-
1195-
free_filter:
1196-
bpf_prog_unlock_free(fp);
1197-
}

include/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
#define _LINUX_BPF_H 1
99

1010
#include <uapi/linux/bpf.h>
11+
1112
#include <linux/workqueue.h>
1213
#include <linux/file.h>
1314
#include <linux/percpu.h>
1415
#include <linux/err.h>
16+
#include <linux/rbtree_latch.h>
1517

1618
struct perf_event;
1719
struct bpf_map;
@@ -177,6 +179,8 @@ struct bpf_prog_aux {
177179
atomic_t refcnt;
178180
u32 used_map_cnt;
179181
u32 max_ctx_offset;
182+
struct latch_tree_node ksym_tnode;
183+
struct list_head ksym_lnode;
180184
const struct bpf_verifier_ops *ops;
181185
struct bpf_map **used_maps;
182186
struct bpf_prog *prog;

include/linux/filter.h

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ struct bpf_prog_aux;
5454
#define BPF_REG_AX MAX_BPF_REG
5555
#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1)
5656

57+
/* As per nm, we expose JITed images as text (code) section for
58+
* kallsyms. That way, tools like perf can find it to match
59+
* addresses.
60+
*/
61+
#define BPF_SYM_ELF_TYPE 't'
62+
5763
/* BPF program can access up to 512 bytes of stack space. */
5864
#define MAX_BPF_STACK 512
5965

@@ -555,6 +561,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
555561
{
556562
set_memory_rw((unsigned long)fp, fp->pages);
557563
}
564+
565+
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
566+
{
567+
set_memory_rw((unsigned long)hdr, hdr->pages);
568+
}
558569
#else
559570
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
560571
{
@@ -563,8 +574,21 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
563574
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
564575
{
565576
}
577+
578+
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
579+
{
580+
}
566581
#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
567582

583+
static inline struct bpf_binary_header *
584+
bpf_jit_binary_hdr(const struct bpf_prog *fp)
585+
{
586+
unsigned long real_start = (unsigned long)fp->bpf_func;
587+
unsigned long addr = real_start & PAGE_MASK;
588+
589+
return (void *)addr;
590+
}
591+
568592
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
569593
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
570594
{
@@ -617,6 +641,7 @@ void bpf_warn_invalid_xdp_action(u32 act);
617641
#ifdef CONFIG_BPF_JIT
618642
extern int bpf_jit_enable;
619643
extern int bpf_jit_harden;
644+
extern int bpf_jit_kallsyms;
620645

621646
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
622647

@@ -651,6 +676,11 @@ static inline bool bpf_jit_is_ebpf(void)
651676
# endif
652677
}
653678

679+
static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
680+
{
681+
return fp->jited && bpf_jit_is_ebpf();
682+
}
683+
654684
static inline bool bpf_jit_blinding_enabled(void)
655685
{
656686
/* These are the prerequisites, should someone ever have the
@@ -668,11 +698,91 @@ static inline bool bpf_jit_blinding_enabled(void)
668698

669699
return true;
670700
}
671-
#else
701+
702+
static inline bool bpf_jit_kallsyms_enabled(void)
703+
{
704+
/* There are a couple of corner cases where kallsyms should
705+
* not be enabled f.e. on hardening.
706+
*/
707+
if (bpf_jit_harden)
708+
return false;
709+
if (!bpf_jit_kallsyms)
710+
return false;
711+
if (bpf_jit_kallsyms == 1)
712+
return true;
713+
714+
return false;
715+
}
716+
717+
const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
718+
unsigned long *off, char *sym);
719+
bool is_bpf_text_address(unsigned long addr);
720+
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
721+
char *sym);
722+
723+
static inline const char *
724+
bpf_address_lookup(unsigned long addr, unsigned long *size,
725+
unsigned long *off, char **modname, char *sym)
726+
{
727+
const char *ret = __bpf_address_lookup(addr, size, off, sym);
728+
729+
if (ret && modname)
730+
*modname = NULL;
731+
return ret;
732+
}
733+
734+
void bpf_prog_kallsyms_add(struct bpf_prog *fp);
735+
void bpf_prog_kallsyms_del(struct bpf_prog *fp);
736+
737+
#else /* CONFIG_BPF_JIT */
738+
739+
static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
740+
{
741+
return false;
742+
}
743+
672744
static inline void bpf_jit_free(struct bpf_prog *fp)
673745
{
674746
bpf_prog_unlock_free(fp);
675747
}
748+
749+
static inline bool bpf_jit_kallsyms_enabled(void)
750+
{
751+
return false;
752+
}
753+
754+
static inline const char *
755+
__bpf_address_lookup(unsigned long addr, unsigned long *size,
756+
unsigned long *off, char *sym)
757+
{
758+
return NULL;
759+
}
760+
761+
static inline bool is_bpf_text_address(unsigned long addr)
762+
{
763+
return false;
764+
}
765+
766+
static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
767+
char *type, char *sym)
768+
{
769+
return -ERANGE;
770+
}
771+
772+
static inline const char *
773+
bpf_address_lookup(unsigned long addr, unsigned long *size,
774+
unsigned long *off, char **modname, char *sym)
775+
{
776+
return NULL;
777+
}
778+
779+
static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
780+
{
781+
}
782+
783+
static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
784+
{
785+
}
676786
#endif /* CONFIG_BPF_JIT */
677787

678788
#define BPF_ANC BIT(15)

0 commit comments

Comments
 (0)