Skip to content

Commit e9f02a8

Browse files
author
Alexei Starovoitov
committed
Merge branch 'trampoline-fixes'
Jiri Olsa says: ==================== hi, sending 2 fixes to fix kernel support for loading trampoline programs in bcc/bpftrace and allow to unwind through trampoline/dispatcher. Original rfc post [1]. Speedup output of perf bench while running klockstat.py on kprobes vs trampolines: Without: $ perf bench sched messaging -l 50000 ... Total time: 18.571 [sec] With current kprobe tracing: $ perf bench sched messaging -l 50000 ... Total time: 183.395 [sec] With kfunc tracing: $ perf bench sched messaging -l 50000 ... Total time: 39.773 [sec] v4 changes: - rebased on latest bpf-next/master - removed image tree mutex and use trampoline_mutex instead - checking directly for string pointer in patch 1 [Alexei] - skipped helpers patches, as they are no longer needed [Alexei] v3 changes: - added ack from John Fastabend for patch 1 - move out is_bpf_image_address from is_bpf_text_address call [David] v2 changes: - make the unwind work for dispatcher as well - added test for allowed trampolines count - used raw tp pt_regs nest-arrays for trampoline helpers thanks, jirka [1] https://lore.kernel.org/netdev/[email protected]/ ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 35b9211 + d633d57 commit e9f02a8

File tree

7 files changed

+239
-13
lines changed

7 files changed

+239
-13
lines changed

include/linux/bpf.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
525525
int bpf_trampoline_link_prog(struct bpf_prog *prog);
526526
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
527527
void bpf_trampoline_put(struct bpf_trampoline *tr);
528-
void *bpf_jit_alloc_exec_page(void);
529528
#define BPF_DISPATCHER_INIT(name) { \
530529
.mutex = __MUTEX_INITIALIZER(name.mutex), \
531530
.func = &name##func, \
@@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
557556
#define BPF_DISPATCHER_PTR(name) (&name)
558557
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
559558
struct bpf_prog *to);
559+
struct bpf_image {
560+
struct latch_tree_node tnode;
561+
unsigned char data[];
562+
};
563+
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
564+
bool is_bpf_image_address(unsigned long address);
565+
void *bpf_image_alloc(void);
560566
#else
561567
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
562568
{
@@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
578584
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
579585
struct bpf_prog *from,
580586
struct bpf_prog *to) {}
587+
static inline bool is_bpf_image_address(unsigned long address)
588+
{
589+
return false;
590+
}
581591
#endif
582592

583593
struct bpf_func_info_aux {

kernel/bpf/btf.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
36693669
}
36703670
}
36713671

3672+
static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
3673+
{
3674+
/* t comes in already as a pointer */
3675+
t = btf_type_by_id(btf, t->type);
3676+
3677+
/* allow const */
3678+
if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
3679+
t = btf_type_by_id(btf, t->type);
3680+
3681+
/* char, signed char, unsigned char */
3682+
return btf_type_is_int(t) && t->size == 1;
3683+
}
3684+
36723685
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
36733686
const struct bpf_prog *prog,
36743687
struct bpf_insn_access_aux *info)
@@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
37353748
*/
37363749
return true;
37373750

3751+
if (is_string_ptr(btf, t))
3752+
return true;
3753+
37383754
/* this is a pointer to another type */
37393755
info->reg_type = PTR_TO_BTF_ID;
37403756

kernel/bpf/dispatcher.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
113113
noff = 0;
114114
} else {
115115
old = d->image + d->image_off;
116-
noff = d->image_off ^ (PAGE_SIZE / 2);
116+
noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
117117
}
118118

119119
new = d->num_progs ? d->image + noff : NULL;
@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
140140

141141
mutex_lock(&d->mutex);
142142
if (!d->image) {
143-
d->image = bpf_jit_alloc_exec_page();
143+
d->image = bpf_image_alloc();
144144
if (!d->image)
145145
goto out;
146146
}

kernel/bpf/trampoline.c

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/bpf.h>
55
#include <linux/filter.h>
66
#include <linux/ftrace.h>
7+
#include <linux/rbtree_latch.h>
78

89
/* dummy _ops. The verifier will operate on target program's ops. */
910
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
1617
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
1718

1819
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
20+
static struct latch_tree_root image_tree __cacheline_aligned;
1921

20-
/* serializes access to trampoline_table */
22+
/* serializes access to trampoline_table and image_tree */
2123
static DEFINE_MUTEX(trampoline_mutex);
2224

23-
void *bpf_jit_alloc_exec_page(void)
25+
static void *bpf_jit_alloc_exec_page(void)
2426
{
2527
void *image;
2628

@@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
3638
return image;
3739
}
3840

41+
static __always_inline bool image_tree_less(struct latch_tree_node *a,
42+
struct latch_tree_node *b)
43+
{
44+
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
45+
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
46+
47+
return ia < ib;
48+
}
49+
50+
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
51+
{
52+
void *image = container_of(n, struct bpf_image, tnode);
53+
54+
if (addr < image)
55+
return -1;
56+
if (addr >= image + PAGE_SIZE)
57+
return 1;
58+
59+
return 0;
60+
}
61+
62+
static const struct latch_tree_ops image_tree_ops = {
63+
.less = image_tree_less,
64+
.comp = image_tree_comp,
65+
};
66+
67+
static void *__bpf_image_alloc(bool lock)
68+
{
69+
struct bpf_image *image;
70+
71+
image = bpf_jit_alloc_exec_page();
72+
if (!image)
73+
return NULL;
74+
75+
if (lock)
76+
mutex_lock(&trampoline_mutex);
77+
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
78+
if (lock)
79+
mutex_unlock(&trampoline_mutex);
80+
return image->data;
81+
}
82+
83+
void *bpf_image_alloc(void)
84+
{
85+
return __bpf_image_alloc(true);
86+
}
87+
88+
bool is_bpf_image_address(unsigned long addr)
89+
{
90+
bool ret;
91+
92+
rcu_read_lock();
93+
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
94+
rcu_read_unlock();
95+
96+
return ret;
97+
}
98+
3999
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
40100
{
41101
struct bpf_trampoline *tr;
@@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
56116
goto out;
57117

58118
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
59-
image = bpf_jit_alloc_exec_page();
119+
image = __bpf_image_alloc(false);
60120
if (!image) {
61121
kfree(tr);
62122
tr = NULL;
@@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
131191
}
132192

133193
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
134-
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2
194+
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
135195
*/
136196
#define BPF_MAX_TRAMP_PROGS 40
137197

138198
static int bpf_trampoline_update(struct bpf_trampoline *tr)
139199
{
140-
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
141-
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
200+
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
201+
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
142202
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
143203
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
144204
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
174234
*/
175235
synchronize_rcu_tasks();
176236

177-
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
237+
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
178238
&tr->func.model, flags,
179239
fentry, fentry_cnt,
180240
fexit, fexit_cnt,
@@ -284,6 +344,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
284344

285345
void bpf_trampoline_put(struct bpf_trampoline *tr)
286346
{
347+
struct bpf_image *image;
348+
287349
if (!tr)
288350
return;
289351
mutex_lock(&trampoline_mutex);
@@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
294356
goto out;
295357
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
296358
goto out;
359+
image = container_of(tr->image, struct bpf_image, data);
360+
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
297361
/* wait for tasks to get out of trampoline before freeing it */
298362
synchronize_rcu_tasks();
299-
bpf_jit_free_exec(tr->image);
363+
bpf_jit_free_exec(image);
300364
hlist_del(&tr->hlist);
301365
kfree(tr);
302366
out:

kernel/extable.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
131131
* triggers a stack trace, or a WARN() that happens during
132132
* coming back from idle, or cpu on or offlining.
133133
*
134-
* is_module_text_address() as well as the kprobe slots
135-
* and is_bpf_text_address() require RCU to be watching.
134+
* is_module_text_address() as well as the kprobe slots,
135+
* is_bpf_text_address() and is_bpf_image_address require
136+
* RCU to be watching.
136137
*/
137138
no_rcu = !rcu_is_watching();
138139

@@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
148149
goto out;
149150
if (is_bpf_text_address(addr))
150151
goto out;
152+
if (is_bpf_image_address(addr))
153+
goto out;
151154
ret = 0;
152155
out:
153156
if (no_rcu)
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
#define _GNU_SOURCE
3+
#include <sched.h>
4+
#include <sys/prctl.h>
5+
#include <test_progs.h>
6+
7+
#define MAX_TRAMP_PROGS 40
8+
9+
struct inst {
10+
struct bpf_object *obj;
11+
struct bpf_link *link_fentry;
12+
struct bpf_link *link_fexit;
13+
};
14+
15+
static int test_task_rename(void)
16+
{
17+
int fd, duration = 0, err;
18+
char buf[] = "test_overhead";
19+
20+
fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
21+
if (CHECK(fd < 0, "open /proc", "err %d", errno))
22+
return -1;
23+
err = write(fd, buf, sizeof(buf));
24+
if (err < 0) {
25+
CHECK(err < 0, "task rename", "err %d", errno);
26+
close(fd);
27+
return -1;
28+
}
29+
close(fd);
30+
return 0;
31+
}
32+
33+
static struct bpf_link *load(struct bpf_object *obj, const char *name)
34+
{
35+
struct bpf_program *prog;
36+
int duration = 0;
37+
38+
prog = bpf_object__find_program_by_title(obj, name);
39+
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
40+
return ERR_PTR(-EINVAL);
41+
return bpf_program__attach_trace(prog);
42+
}
43+
44+
void test_trampoline_count(void)
45+
{
46+
const char *fentry_name = "fentry/__set_task_comm";
47+
const char *fexit_name = "fexit/__set_task_comm";
48+
const char *object = "test_trampoline_count.o";
49+
struct inst inst[MAX_TRAMP_PROGS] = { 0 };
50+
int err, i = 0, duration = 0;
51+
struct bpf_object *obj;
52+
struct bpf_link *link;
53+
char comm[16] = {};
54+
55+
/* attach 'allowed' 40 trampoline programs */
56+
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
57+
obj = bpf_object__open_file(object, NULL);
58+
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
59+
goto cleanup;
60+
61+
err = bpf_object__load(obj);
62+
if (CHECK(err, "obj_load", "err %d\n", err))
63+
goto cleanup;
64+
inst[i].obj = obj;
65+
66+
if (rand() % 2) {
67+
link = load(obj, fentry_name);
68+
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
69+
goto cleanup;
70+
inst[i].link_fentry = link;
71+
} else {
72+
link = load(obj, fexit_name);
73+
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
74+
goto cleanup;
75+
inst[i].link_fexit = link;
76+
}
77+
}
78+
79+
/* and try 1 extra.. */
80+
obj = bpf_object__open_file(object, NULL);
81+
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
82+
goto cleanup;
83+
84+
err = bpf_object__load(obj);
85+
if (CHECK(err, "obj_load", "err %d\n", err))
86+
goto cleanup_extra;
87+
88+
/* ..that needs to fail */
89+
link = load(obj, fentry_name);
90+
if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
91+
bpf_link__destroy(link);
92+
goto cleanup_extra;
93+
}
94+
95+
/* with E2BIG error */
96+
CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
97+
98+
/* and finaly execute the probe */
99+
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
100+
goto cleanup_extra;
101+
CHECK_FAIL(test_task_rename());
102+
CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
103+
104+
cleanup_extra:
105+
bpf_object__close(obj);
106+
cleanup:
107+
while (--i) {
108+
bpf_link__destroy(inst[i].link_fentry);
109+
bpf_link__destroy(inst[i].link_fexit);
110+
bpf_object__close(inst[i].obj);
111+
}
112+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <stdbool.h>
3+
#include <stddef.h>
4+
#include <linux/bpf.h>
5+
#include "bpf_trace_helpers.h"
6+
7+
struct task_struct;
8+
9+
SEC("fentry/__set_task_comm")
10+
int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
11+
{
12+
return 0;
13+
}
14+
15+
SEC("fexit/__set_task_comm")
16+
int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
17+
{
18+
return 0;
19+
}
20+
21+
char _license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)