Skip to content

Commit 177366b

Browse files
4astdavem330
authored andcommitted
bpf: change x86 JITed program stack layout
in order to JIT programs with different stack sizes we need to make epilogue and exception path to be stack size independent, hence move auxiliary stack space from the bottom of the stack to the top of the stack. Nice side effect is that JITed function prologue becomes shorter due to imm8 offset encoding vs imm32. Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Daniel Borkmann <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b870aa9 commit 177366b

File tree

2 files changed

+40
-38
lines changed

2 files changed

+40
-38
lines changed

arch/x86/net/bpf_jit.S

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
*/
2020
#define SKBDATA %r10
2121
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
22-
#define MAX_BPF_STACK (512 /* from filter.h */ + \
23-
32 /* space for rbx,r13,r14,r15 */ + \
24-
8 /* space for skb_copy_bits */)
2522

2623
#define FUNC(name) \
2724
.globl name; \
@@ -66,7 +63,7 @@ FUNC(sk_load_byte_positive_offset)
6663

6764
/* rsi contains offset and can be scratched */
6865
#define bpf_slow_path_common(LEN) \
69-
lea -MAX_BPF_STACK + 32(%rbp), %rdx;\
66+
lea 32(%rbp), %rdx;\
7067
FRAME_BEGIN; \
7168
mov %rbx, %rdi; /* arg1 == skb */ \
7269
push %r9; \
@@ -83,22 +80,22 @@ FUNC(sk_load_byte_positive_offset)
8380
bpf_slow_path_word:
8481
bpf_slow_path_common(4)
8582
js bpf_error
86-
mov - MAX_BPF_STACK + 32(%rbp),%eax
83+
mov 32(%rbp),%eax
8784
bswap %eax
8885
ret
8986

9087
bpf_slow_path_half:
9188
bpf_slow_path_common(2)
9289
js bpf_error
93-
mov - MAX_BPF_STACK + 32(%rbp),%ax
90+
mov 32(%rbp),%ax
9491
rol $8,%ax
9592
movzwl %ax,%eax
9693
ret
9794

9895
bpf_slow_path_byte:
9996
bpf_slow_path_common(1)
10097
js bpf_error
101-
movzbl - MAX_BPF_STACK + 32(%rbp),%eax
98+
movzbl 32(%rbp),%eax
10299
ret
103100

104101
#define sk_negative_common(SIZE) \
@@ -148,9 +145,10 @@ FUNC(sk_load_byte_negative_offset)
148145
bpf_error:
149146
# force a return 0 from jit handler
150147
xor %eax,%eax
151-
mov - MAX_BPF_STACK(%rbp),%rbx
152-
mov - MAX_BPF_STACK + 8(%rbp),%r13
153-
mov - MAX_BPF_STACK + 16(%rbp),%r14
154-
mov - MAX_BPF_STACK + 24(%rbp),%r15
148+
mov (%rbp),%rbx
149+
mov 8(%rbp),%r13
150+
mov 16(%rbp),%r14
151+
mov 24(%rbp),%r15
152+
add $40, %rbp
155153
leaveq
156154
ret

arch/x86/net/bpf_jit_comp.c

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,11 @@ struct jit_context {
197197
#define BPF_MAX_INSN_SIZE 128
198198
#define BPF_INSN_SAFETY 64
199199

200-
#define STACKSIZE \
201-
(MAX_BPF_STACK + \
202-
32 /* space for rbx, r13, r14, r15 */ + \
200+
#define AUX_STACK_SPACE \
201+
(32 /* space for rbx, r13, r14, r15 */ + \
203202
8 /* space for skb_copy_bits() buffer */)
204203

205-
#define PROLOGUE_SIZE 48
204+
#define PROLOGUE_SIZE 37
206205

207206
/* emit x64 prologue code for BPF program and check it's size.
208207
* bpf_tail_call helper will skip it while jumping into another program
@@ -215,13 +214,16 @@ static void emit_prologue(u8 **pprog)
215214
EMIT1(0x55); /* push rbp */
216215
EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
217216

218-
/* sub rsp, STACKSIZE */
219-
EMIT3_off32(0x48, 0x81, 0xEC, STACKSIZE);
217+
/* sub rsp, MAX_BPF_STACK + AUX_STACK_SPACE */
218+
EMIT3_off32(0x48, 0x81, 0xEC, MAX_BPF_STACK + AUX_STACK_SPACE);
219+
220+
/* sub rbp, AUX_STACK_SPACE */
221+
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
220222

221223
/* all classic BPF filters use R6(rbx) save it */
222224

223-
/* mov qword ptr [rbp-X],rbx */
224-
EMIT3_off32(0x48, 0x89, 0x9D, -STACKSIZE);
225+
/* mov qword ptr [rbp+0],rbx */
226+
EMIT4(0x48, 0x89, 0x5D, 0);
225227

226228
/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
227229
* as temporary, so all tcpdump filters need to spill/fill R7(r13) and
@@ -231,12 +233,12 @@ static void emit_prologue(u8 **pprog)
231233
* than synthetic ones. Therefore not worth adding complexity.
232234
*/
233235

234-
/* mov qword ptr [rbp-X],r13 */
235-
EMIT3_off32(0x4C, 0x89, 0xAD, -STACKSIZE + 8);
236-
/* mov qword ptr [rbp-X],r14 */
237-
EMIT3_off32(0x4C, 0x89, 0xB5, -STACKSIZE + 16);
238-
/* mov qword ptr [rbp-X],r15 */
239-
EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24);
236+
/* mov qword ptr [rbp+8],r13 */
237+
EMIT4(0x4C, 0x89, 0x6D, 8);
238+
/* mov qword ptr [rbp+16],r14 */
239+
EMIT4(0x4C, 0x89, 0x75, 16);
240+
/* mov qword ptr [rbp+24],r15 */
241+
EMIT4(0x4C, 0x89, 0x7D, 24);
240242

241243
/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
242244
* we need to reset the counter to 0. It's done in two instructions,
@@ -246,8 +248,8 @@ static void emit_prologue(u8 **pprog)
246248

247249
/* xor eax, eax */
248250
EMIT2(0x31, 0xc0);
249-
/* mov qword ptr [rbp-X], rax */
250-
EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32);
251+
/* mov qword ptr [rbp+32], rax */
252+
EMIT4(0x48, 0x89, 0x45, 32);
251253

252254
BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
253255
*pprog = prog;
@@ -289,13 +291,13 @@ static void emit_bpf_tail_call(u8 **pprog)
289291
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
290292
* goto out;
291293
*/
292-
EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
294+
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
293295
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
294296
#define OFFSET2 36
295297
EMIT2(X86_JA, OFFSET2); /* ja out */
296298
label2 = cnt;
297299
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
298-
EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
300+
EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */
299301

300302
/* prog = array->ptrs[index]; */
301303
EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
@@ -1036,15 +1038,17 @@ xadd: if (is_imm8(insn->off))
10361038
seen_exit = true;
10371039
/* update cleanup_addr */
10381040
ctx->cleanup_addr = proglen;
1039-
/* mov rbx, qword ptr [rbp-X] */
1040-
EMIT3_off32(0x48, 0x8B, 0x9D, -STACKSIZE);
1041-
/* mov r13, qword ptr [rbp-X] */
1042-
EMIT3_off32(0x4C, 0x8B, 0xAD, -STACKSIZE + 8);
1043-
/* mov r14, qword ptr [rbp-X] */
1044-
EMIT3_off32(0x4C, 0x8B, 0xB5, -STACKSIZE + 16);
1045-
/* mov r15, qword ptr [rbp-X] */
1046-
EMIT3_off32(0x4C, 0x8B, 0xBD, -STACKSIZE + 24);
1047-
1041+
/* mov rbx, qword ptr [rbp+0] */
1042+
EMIT4(0x48, 0x8B, 0x5D, 0);
1043+
/* mov r13, qword ptr [rbp+8] */
1044+
EMIT4(0x4C, 0x8B, 0x6D, 8);
1045+
/* mov r14, qword ptr [rbp+16] */
1046+
EMIT4(0x4C, 0x8B, 0x75, 16);
1047+
/* mov r15, qword ptr [rbp+24] */
1048+
EMIT4(0x4C, 0x8B, 0x7D, 24);
1049+
1050+
/* add rbp, AUX_STACK_SPACE */
1051+
EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
10481052
EMIT1(0xC9); /* leave */
10491053
EMIT1(0xC3); /* ret */
10501054
break;

0 commit comments

Comments
 (0)