Skip to content

Commit 75ccbef

Browse files
Björn TöpelAlexei Starovoitov
authored andcommitted
bpf: Introduce BPF dispatcher
The BPF dispatcher is a multi-way branch code generator, mainly targeted for XDP programs. When an XDP program is executed via the bpf_prog_run_xdp(), it is invoked via an indirect call. The indirect call has a substantial performance impact, when retpolines are enabled. The dispatcher transform indirect calls to direct calls, and therefore avoids the retpoline. The dispatcher is generated using the BPF JIT, and relies on text poking provided by bpf_arch_text_poke(). The dispatcher hijacks a trampoline function it via the __fentry__ nop of the trampoline. One dispatcher instance currently supports up to 64 dispatch points. A user creates a dispatcher with its corresponding trampoline with the DEFINE_BPF_DISPATCHER macro. Signed-off-by: Björn Töpel <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 98e8627 commit 75ccbef

File tree

4 files changed

+337
-0
lines changed

4 files changed

+337
-0
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
#include <linux/if_vlan.h>
1111
#include <linux/bpf.h>
1212
#include <linux/memory.h>
13+
#include <linux/sort.h>
1314
#include <asm/extable.h>
1415
#include <asm/set_memory.h>
1516
#include <asm/nospec-branch.h>
1617
#include <asm/text-patching.h>
18+
#include <asm/asm-prototypes.h>
1719

1820
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
1921
{
@@ -1530,6 +1532,126 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags
15301532
return 0;
15311533
}
15321534

1535+
static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
1536+
{
1537+
u8 *prog = *pprog;
1538+
int cnt = 0;
1539+
s64 offset;
1540+
1541+
offset = func - (ip + 2 + 4);
1542+
if (!is_simm32(offset)) {
1543+
pr_err("Target %p is out of range\n", func);
1544+
return -EINVAL;
1545+
}
1546+
EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
1547+
*pprog = prog;
1548+
return 0;
1549+
}
1550+
1551+
static int emit_fallback_jump(u8 **pprog)
1552+
{
1553+
u8 *prog = *pprog;
1554+
int err = 0;
1555+
1556+
#ifdef CONFIG_RETPOLINE
1557+
/* Note that this assumes the the compiler uses external
1558+
* thunks for indirect calls. Both clang and GCC use the same
1559+
* naming convention for external thunks.
1560+
*/
1561+
err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
1562+
#else
1563+
int cnt = 0;
1564+
1565+
EMIT2(0xFF, 0xE2); /* jmp rdx */
1566+
#endif
1567+
*pprog = prog;
1568+
return err;
1569+
}
1570+
1571+
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
1572+
{
1573+
int pivot, err, jg_bytes = 1, cnt = 0;
1574+
u8 *jg_reloc, *prog = *pprog;
1575+
s64 jg_offset;
1576+
1577+
if (a == b) {
1578+
/* Leaf node of recursion, i.e. not a range of indices
1579+
* anymore.
1580+
*/
1581+
EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */
1582+
if (!is_simm32(progs[a]))
1583+
return -1;
1584+
EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3),
1585+
progs[a]);
1586+
err = emit_cond_near_jump(&prog, /* je func */
1587+
(void *)progs[a], prog,
1588+
X86_JE);
1589+
if (err)
1590+
return err;
1591+
1592+
err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
1593+
if (err)
1594+
return err;
1595+
1596+
*pprog = prog;
1597+
return 0;
1598+
}
1599+
1600+
/* Not a leaf node, so we pivot, and recursively descend into
1601+
* the lower and upper ranges.
1602+
*/
1603+
pivot = (b - a) / 2;
1604+
EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */
1605+
if (!is_simm32(progs[a + pivot]))
1606+
return -1;
1607+
EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]);
1608+
1609+
if (pivot > 2) { /* jg upper_part */
1610+
/* Require near jump. */
1611+
jg_bytes = 4;
1612+
EMIT2_off32(0x0F, X86_JG + 0x10, 0);
1613+
} else {
1614+
EMIT2(X86_JG, 0);
1615+
}
1616+
jg_reloc = prog;
1617+
1618+
err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */
1619+
progs);
1620+
if (err)
1621+
return err;
1622+
1623+
jg_offset = prog - jg_reloc;
1624+
emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
1625+
1626+
err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */
1627+
b, progs);
1628+
if (err)
1629+
return err;
1630+
1631+
*pprog = prog;
1632+
return 0;
1633+
}
1634+
1635+
static int cmp_ips(const void *a, const void *b)
1636+
{
1637+
const s64 *ipa = a;
1638+
const s64 *ipb = b;
1639+
1640+
if (*ipa > *ipb)
1641+
return 1;
1642+
if (*ipa < *ipb)
1643+
return -1;
1644+
return 0;
1645+
}
1646+
1647+
int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
1648+
{
1649+
u8 *prog = image;
1650+
1651+
sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL);
1652+
return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs);
1653+
}
1654+
15331655
struct x64_jit_data {
15341656
struct bpf_binary_header *header;
15351657
int *addrs;

include/linux/bpf.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,12 +470,61 @@ struct bpf_trampoline {
470470
void *image;
471471
u64 selector;
472472
};
473+
474+
#define BPF_DISPATCHER_MAX 64 /* Fits in 2048B */
475+
476+
struct bpf_dispatcher_prog {
477+
struct bpf_prog *prog;
478+
refcount_t users;
479+
};
480+
481+
struct bpf_dispatcher {
482+
/* dispatcher mutex */
483+
struct mutex mutex;
484+
void *func;
485+
struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX];
486+
int num_progs;
487+
void *image;
488+
u32 image_off;
489+
};
490+
473491
#ifdef CONFIG_BPF_JIT
474492
struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
475493
int bpf_trampoline_link_prog(struct bpf_prog *prog);
476494
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
477495
void bpf_trampoline_put(struct bpf_trampoline *tr);
478496
void *bpf_jit_alloc_exec_page(void);
497+
#define BPF_DISPATCHER_INIT(name) { \
498+
.mutex = __MUTEX_INITIALIZER(name.mutex), \
499+
.func = &name##func, \
500+
.progs = {}, \
501+
.num_progs = 0, \
502+
.image = NULL, \
503+
.image_off = 0 \
504+
}
505+
506+
#define DEFINE_BPF_DISPATCHER(name) \
507+
noinline unsigned int name##func( \
508+
const void *ctx, \
509+
const struct bpf_insn *insnsi, \
510+
unsigned int (*bpf_func)(const void *, \
511+
const struct bpf_insn *)) \
512+
{ \
513+
return bpf_func(ctx, insnsi); \
514+
} \
515+
EXPORT_SYMBOL(name##func); \
516+
struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name);
517+
#define DECLARE_BPF_DISPATCHER(name) \
518+
unsigned int name##func( \
519+
const void *ctx, \
520+
const struct bpf_insn *insnsi, \
521+
unsigned int (*bpf_func)(const void *, \
522+
const struct bpf_insn *)); \
523+
extern struct bpf_dispatcher name;
524+
#define BPF_DISPATCHER_FUNC(name) name##func
525+
#define BPF_DISPATCHER_PTR(name) (&name)
526+
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
527+
struct bpf_prog *to);
479528
#else
480529
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
481530
{
@@ -490,6 +539,13 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
490539
return -ENOTSUPP;
491540
}
492541
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
542+
#define DEFINE_BPF_DISPATCHER(name)
543+
#define DECLARE_BPF_DISPATCHER(name)
544+
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc
545+
#define BPF_DISPATCHER_PTR(name) NULL
546+
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
547+
struct bpf_prog *from,
548+
struct bpf_prog *to) {}
493549
#endif
494550

495551
struct bpf_func_info_aux {

kernel/bpf/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
88
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
99
obj-$(CONFIG_BPF_JIT) += trampoline.o
1010
obj-$(CONFIG_BPF_SYSCALL) += btf.o
11+
obj-$(CONFIG_BPF_JIT) += dispatcher.o
1112
ifeq ($(CONFIG_NET),y)
1213
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
1314
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o

kernel/bpf/dispatcher.c

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/* Copyright(c) 2019 Intel Corporation. */
3+
4+
#include <linux/hash.h>
5+
#include <linux/bpf.h>
6+
#include <linux/filter.h>
7+
8+
/* The BPF dispatcher is a multiway branch code generator. The
9+
* dispatcher is a mechanism to avoid the performance penalty of an
10+
* indirect call, which is expensive when retpolines are enabled. A
11+
* dispatch client registers a BPF program into the dispatcher, and if
12+
* there is available room in the dispatcher a direct call to the BPF
13+
* program will be generated. All calls to the BPF programs called via
14+
* the dispatcher will then be a direct call, instead of an
15+
* indirect. The dispatcher hijacks a trampoline function it via the
16+
* __fentry__ of the trampoline. The trampoline function has the
17+
* following signature:
18+
*
19+
* unsigned int trampoline(const void *ctx, const struct bpf_insn *insnsi,
20+
* unsigned int (*bpf_func)(const void *,
21+
* const struct bpf_insn *));
22+
*/
23+
24+
static struct bpf_dispatcher_prog *bpf_dispatcher_find_prog(
25+
struct bpf_dispatcher *d, struct bpf_prog *prog)
26+
{
27+
int i;
28+
29+
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
30+
if (prog == d->progs[i].prog)
31+
return &d->progs[i];
32+
}
33+
return NULL;
34+
}
35+
36+
static struct bpf_dispatcher_prog *bpf_dispatcher_find_free(
37+
struct bpf_dispatcher *d)
38+
{
39+
return bpf_dispatcher_find_prog(d, NULL);
40+
}
41+
42+
static bool bpf_dispatcher_add_prog(struct bpf_dispatcher *d,
43+
struct bpf_prog *prog)
44+
{
45+
struct bpf_dispatcher_prog *entry;
46+
47+
if (!prog)
48+
return false;
49+
50+
entry = bpf_dispatcher_find_prog(d, prog);
51+
if (entry) {
52+
refcount_inc(&entry->users);
53+
return false;
54+
}
55+
56+
entry = bpf_dispatcher_find_free(d);
57+
if (!entry)
58+
return false;
59+
60+
bpf_prog_inc(prog);
61+
entry->prog = prog;
62+
refcount_set(&entry->users, 1);
63+
d->num_progs++;
64+
return true;
65+
}
66+
67+
static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d,
68+
struct bpf_prog *prog)
69+
{
70+
struct bpf_dispatcher_prog *entry;
71+
72+
if (!prog)
73+
return false;
74+
75+
entry = bpf_dispatcher_find_prog(d, prog);
76+
if (!entry)
77+
return false;
78+
79+
if (refcount_dec_and_test(&entry->users)) {
80+
entry->prog = NULL;
81+
bpf_prog_put(prog);
82+
d->num_progs--;
83+
return true;
84+
}
85+
return false;
86+
}
87+
88+
int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
89+
{
90+
return -ENOTSUPP;
91+
}
92+
93+
static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image)
94+
{
95+
s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
96+
int i;
97+
98+
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
99+
if (d->progs[i].prog)
100+
*ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func;
101+
}
102+
return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs);
103+
}
104+
105+
static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
106+
{
107+
void *old, *new;
108+
u32 noff;
109+
int err;
110+
111+
if (!prev_num_progs) {
112+
old = NULL;
113+
noff = 0;
114+
} else {
115+
old = d->image + d->image_off;
116+
noff = d->image_off ^ (PAGE_SIZE / 2);
117+
}
118+
119+
new = d->num_progs ? d->image + noff : NULL;
120+
if (new) {
121+
if (bpf_dispatcher_prepare(d, new))
122+
return;
123+
}
124+
125+
err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new);
126+
if (err || !new)
127+
return;
128+
129+
d->image_off = noff;
130+
}
131+
132+
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
133+
struct bpf_prog *to)
134+
{
135+
bool changed = false;
136+
int prev_num_progs;
137+
138+
if (from == to)
139+
return;
140+
141+
mutex_lock(&d->mutex);
142+
if (!d->image) {
143+
d->image = bpf_jit_alloc_exec_page();
144+
if (!d->image)
145+
goto out;
146+
}
147+
148+
prev_num_progs = d->num_progs;
149+
changed |= bpf_dispatcher_remove_prog(d, from);
150+
changed |= bpf_dispatcher_add_prog(d, to);
151+
152+
if (!changed)
153+
goto out;
154+
155+
bpf_dispatcher_update(d, prev_num_progs);
156+
out:
157+
mutex_unlock(&d->mutex);
158+
}

0 commit comments

Comments
 (0)