Skip to content

Commit d1c9910

Browse files
dwmw2Ingo Molnar
authored andcommitted
Revert "x86/retpoline: Simplify vmexit_fill_RSB()"
This reverts commit 1dde741. By putting the RSB filling out of line and calling it, we waste one RSB slot for returning from the function itself, which means one fewer actual function call we can make if we're doing the Skylake abomination of call-depth counting. It also changed the number of RSB stuffings we do on vmexit from 32, which was correct, to 16. Let's just stop with the bikeshedding; it didn't actually *fix* anything anyway. Signed-off-by: David Woodhouse <[email protected]> Acked-by: Thomas Gleixner <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 8554004 commit d1c9910

File tree

6 files changed

+65
-71
lines changed

6 files changed

+65
-71
lines changed

arch/x86/entry/entry_32.S

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
252252
* exist, overwrite the RSB with entries which capture
253253
* speculative execution to prevent attack.
254254
*/
255-
/* Clobbers %ebx */
256-
FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
255+
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
257256
#endif
258257

259258
/* restore callee-saved registers */

arch/x86/entry/entry_64.S

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
364364
* exist, overwrite the RSB with entries which capture
365365
* speculative execution to prevent attack.
366366
*/
367-
/* Clobbers %rbx */
368-
FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
367+
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
369368
#endif
370369

371370
/* restore callee-saved registers */

arch/x86/include/asm/asm-prototypes.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
3838
INDIRECT_THUNK(si)
3939
INDIRECT_THUNK(di)
4040
INDIRECT_THUNK(bp)
41-
asmlinkage void __fill_rsb(void);
42-
asmlinkage void __clear_rsb(void);
43-
4441
#endif /* CONFIG_RETPOLINE */

arch/x86/include/asm/nospec-branch.h

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,50 @@
88
#include <asm/cpufeatures.h>
99
#include <asm/msr-index.h>
1010

11+
/*
12+
* Fill the CPU return stack buffer.
13+
*
14+
* Each entry in the RSB, if used for a speculative 'ret', contains an
15+
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
16+
*
17+
* This is required in various cases for retpoline and IBRS-based
18+
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
19+
* eliminate potentially bogus entries from the RSB, and sometimes
20+
* purely to ensure that it doesn't get empty, which on some CPUs would
21+
* allow predictions from other (unwanted!) sources to be used.
22+
*
23+
* We define a CPP macro such that it can be used from both .S files and
24+
* inline assembly. It's possible to do a .macro and then include that
25+
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
26+
*/
27+
28+
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
29+
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
30+
31+
/*
32+
* Google experimented with loop-unrolling and this turned out to be
33+
* the optimal version — two calls, each with their own speculation
34+
* trap should their return address end up getting used, in a loop.
35+
*/
36+
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
37+
mov $(nr/2), reg; \
38+
771: \
39+
call 772f; \
40+
773: /* speculation trap */ \
41+
pause; \
42+
lfence; \
43+
jmp 773b; \
44+
772: \
45+
call 774f; \
46+
775: /* speculation trap */ \
47+
pause; \
48+
lfence; \
49+
jmp 775b; \
50+
774: \
51+
dec reg; \
52+
jnz 771b; \
53+
add $(BITS_PER_LONG/8) * nr, sp;
54+
1155
#ifdef __ASSEMBLY__
1256

1357
/*
@@ -78,10 +122,17 @@
78122
#endif
79123
.endm
80124

81-
/* This clobbers the BX register */
82-
.macro FILL_RETURN_BUFFER nr:req ftr:req
125+
/*
126+
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
127+
* monstrosity above, manually.
128+
*/
129+
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
83130
#ifdef CONFIG_RETPOLINE
84-
ALTERNATIVE "", "call __clear_rsb", \ftr
131+
ANNOTATE_NOSPEC_ALTERNATIVE
132+
ALTERNATIVE "jmp .Lskip_rsb_\@", \
133+
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
134+
\ftr
135+
.Lskip_rsb_\@:
85136
#endif
86137
.endm
87138

@@ -156,10 +207,15 @@ extern char __indirect_thunk_end[];
156207
static inline void vmexit_fill_RSB(void)
157208
{
158209
#ifdef CONFIG_RETPOLINE
159-
alternative_input("",
160-
"call __fill_rsb",
161-
X86_FEATURE_RETPOLINE,
162-
ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
210+
unsigned long loops;
211+
212+
asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
213+
ALTERNATIVE("jmp 910f",
214+
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
215+
X86_FEATURE_RETPOLINE)
216+
"910:"
217+
: "=r" (loops), ASM_CALL_CONSTRAINT
218+
: : "memory" );
163219
#endif
164220
}
165221

arch/x86/lib/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
2828
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
2929
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
3030
lib-$(CONFIG_RETPOLINE) += retpoline.o
31-
OBJECT_FILES_NON_STANDARD_retpoline.o :=y
3231

3332
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
3433

arch/x86/lib/retpoline.S

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <asm/alternative-asm.h>
88
#include <asm/export.h>
99
#include <asm/nospec-branch.h>
10-
#include <asm/bitsperlong.h>
1110

1211
.macro THUNK reg
1312
.section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
4746
GENERATE_THUNK(r14)
4847
GENERATE_THUNK(r15)
4948
#endif
50-
51-
/*
52-
* Fill the CPU return stack buffer.
53-
*
54-
* Each entry in the RSB, if used for a speculative 'ret', contains an
55-
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
56-
*
57-
* This is required in various cases for retpoline and IBRS-based
58-
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
59-
* eliminate potentially bogus entries from the RSB, and sometimes
60-
* purely to ensure that it doesn't get empty, which on some CPUs would
61-
* allow predictions from other (unwanted!) sources to be used.
62-
*
63-
* Google experimented with loop-unrolling and this turned out to be
64-
* the optimal version - two calls, each with their own speculation
65-
* trap should their return address end up getting used, in a loop.
66-
*/
67-
.macro STUFF_RSB nr:req sp:req
68-
mov $(\nr / 2), %_ASM_BX
69-
.align 16
70-
771:
71-
call 772f
72-
773: /* speculation trap */
73-
pause
74-
lfence
75-
jmp 773b
76-
.align 16
77-
772:
78-
call 774f
79-
775: /* speculation trap */
80-
pause
81-
lfence
82-
jmp 775b
83-
.align 16
84-
774:
85-
dec %_ASM_BX
86-
jnz 771b
87-
add $((BITS_PER_LONG/8) * \nr), \sp
88-
.endm
89-
90-
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
91-
92-
ENTRY(__fill_rsb)
93-
STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
94-
ret
95-
END(__fill_rsb)
96-
EXPORT_SYMBOL_GPL(__fill_rsb)
97-
98-
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
99-
100-
ENTRY(__clear_rsb)
101-
STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
102-
ret
103-
END(__clear_rsb)
104-
EXPORT_SYMBOL_GPL(__clear_rsb)

0 commit comments

Comments
 (0)