Skip to content

Commit e9b4e60

Browse files
olsajiriAlexei Starovoitov
authored andcommitted
bpf: Allow to resolve bpf trampoline and dispatcher in unwind
When unwinding the stack we need to identify each address to successfully continue. Adding latch tree to keep trampolines for quick lookup during the unwind. The patch uses first 48 bytes for latch tree node, leaving 4048 bytes from the rest of the page for trampoline or dispatcher generated code. It's still enough not to affect trampoline and dispatcher progs maximum counts. Signed-off-by: Jiri Olsa <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 84ad7a7 commit e9b4e60

File tree

4 files changed

+90
-13
lines changed

4 files changed

+90
-13
lines changed

include/linux/bpf.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
525525
int bpf_trampoline_link_prog(struct bpf_prog *prog);
526526
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
527527
void bpf_trampoline_put(struct bpf_trampoline *tr);
528-
void *bpf_jit_alloc_exec_page(void);
529528
#define BPF_DISPATCHER_INIT(name) { \
530529
.mutex = __MUTEX_INITIALIZER(name.mutex), \
531530
.func = &name##func, \
@@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
557556
#define BPF_DISPATCHER_PTR(name) (&name)
558557
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
559558
struct bpf_prog *to);
559+
struct bpf_image {
560+
struct latch_tree_node tnode;
561+
unsigned char data[];
562+
};
563+
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
564+
bool is_bpf_image_address(unsigned long address);
565+
void *bpf_image_alloc(void);
560566
#else
561567
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
562568
{
@@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
578584
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
579585
struct bpf_prog *from,
580586
struct bpf_prog *to) {}
587+
static inline bool is_bpf_image_address(unsigned long address)
588+
{
589+
return false;
590+
}
581591
#endif
582592

583593
struct bpf_func_info_aux {

kernel/bpf/dispatcher.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
113113
noff = 0;
114114
} else {
115115
old = d->image + d->image_off;
116-
noff = d->image_off ^ (PAGE_SIZE / 2);
116+
noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
117117
}
118118

119119
new = d->num_progs ? d->image + noff : NULL;
@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
140140

141141
mutex_lock(&d->mutex);
142142
if (!d->image) {
143-
d->image = bpf_jit_alloc_exec_page();
143+
d->image = bpf_image_alloc();
144144
if (!d->image)
145145
goto out;
146146
}

kernel/bpf/trampoline.c

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/bpf.h>
55
#include <linux/filter.h>
66
#include <linux/ftrace.h>
7+
#include <linux/rbtree_latch.h>
78

89
/* dummy _ops. The verifier will operate on target program's ops. */
910
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
1617
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
1718

1819
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
20+
static struct latch_tree_root image_tree __cacheline_aligned;
1921

20-
/* serializes access to trampoline_table */
22+
/* serializes access to trampoline_table and image_tree */
2123
static DEFINE_MUTEX(trampoline_mutex);
2224

23-
void *bpf_jit_alloc_exec_page(void)
25+
static void *bpf_jit_alloc_exec_page(void)
2426
{
2527
void *image;
2628

@@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
3638
return image;
3739
}
3840

41+
static __always_inline bool image_tree_less(struct latch_tree_node *a,
42+
struct latch_tree_node *b)
43+
{
44+
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
45+
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
46+
47+
return ia < ib;
48+
}
49+
50+
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
51+
{
52+
void *image = container_of(n, struct bpf_image, tnode);
53+
54+
if (addr < image)
55+
return -1;
56+
if (addr >= image + PAGE_SIZE)
57+
return 1;
58+
59+
return 0;
60+
}
61+
62+
static const struct latch_tree_ops image_tree_ops = {
63+
.less = image_tree_less,
64+
.comp = image_tree_comp,
65+
};
66+
67+
static void *__bpf_image_alloc(bool lock)
68+
{
69+
struct bpf_image *image;
70+
71+
image = bpf_jit_alloc_exec_page();
72+
if (!image)
73+
return NULL;
74+
75+
if (lock)
76+
mutex_lock(&trampoline_mutex);
77+
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
78+
if (lock)
79+
mutex_unlock(&trampoline_mutex);
80+
return image->data;
81+
}
82+
83+
void *bpf_image_alloc(void)
84+
{
85+
return __bpf_image_alloc(true);
86+
}
87+
88+
bool is_bpf_image_address(unsigned long addr)
89+
{
90+
bool ret;
91+
92+
rcu_read_lock();
93+
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
94+
rcu_read_unlock();
95+
96+
return ret;
97+
}
98+
3999
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
40100
{
41101
struct bpf_trampoline *tr;
@@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
56116
goto out;
57117

58118
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
59-
image = bpf_jit_alloc_exec_page();
119+
image = __bpf_image_alloc(false);
60120
if (!image) {
61121
kfree(tr);
62122
tr = NULL;
@@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
131191
}
132192

133193
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
134-
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2
194+
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
135195
*/
136196
#define BPF_MAX_TRAMP_PROGS 40
137197

138198
static int bpf_trampoline_update(struct bpf_trampoline *tr)
139199
{
140-
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
141-
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
200+
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
201+
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
142202
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
143203
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
144204
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
174234
*/
175235
synchronize_rcu_tasks();
176236

177-
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
237+
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
178238
&tr->func.model, flags,
179239
fentry, fentry_cnt,
180240
fexit, fexit_cnt,
@@ -284,6 +344,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
284344

285345
void bpf_trampoline_put(struct bpf_trampoline *tr)
286346
{
347+
struct bpf_image *image;
348+
287349
if (!tr)
288350
return;
289351
mutex_lock(&trampoline_mutex);
@@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
294356
goto out;
295357
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
296358
goto out;
359+
image = container_of(tr->image, struct bpf_image, data);
360+
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
297361
/* wait for tasks to get out of trampoline before freeing it */
298362
synchronize_rcu_tasks();
299-
bpf_jit_free_exec(tr->image);
363+
bpf_jit_free_exec(image);
300364
hlist_del(&tr->hlist);
301365
kfree(tr);
302366
out:

kernel/extable.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
131131
* triggers a stack trace, or a WARN() that happens during
132132
* coming back from idle, or cpu on or offlining.
133133
*
134-
* is_module_text_address() as well as the kprobe slots
135-
* and is_bpf_text_address() require RCU to be watching.
134+
* is_module_text_address() as well as the kprobe slots,
135+
* is_bpf_text_address() and is_bpf_image_address require
136+
* RCU to be watching.
136137
*/
137138
no_rcu = !rcu_is_watching();
138139

@@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
148149
goto out;
149150
if (is_bpf_text_address(addr))
150151
goto out;
152+
if (is_bpf_image_address(addr))
153+
goto out;
151154
ret = 0;
152155
out:
153156
if (no_rcu)

0 commit comments

Comments
 (0)