Skip to content

Commit aa8a5e0

Browse files
committed
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the L1-D cache on exit from kernel to user mode, and from hypervisor to guest. This is known to be the case on at least Power7, Power8 and Power9. At this time we do not know the status of the vulnerability on other CPUs such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale CPUs. As more information comes to light we can enable this, or other mechanisms on those CPUs. The vulnerability occurs when the load of an architecturally inaccessible memory region (eg. userspace load of kernel memory) is speculatively executed to the point where its result can influence the address of a subsequent speculatively executed load. In order for that to happen, the first load must hit in the L1, because before the load is sent to the L2 the permission check is performed. Therefore if no kernel addresses hit in the L1 the vulnerability can not occur. We can ensure that is the case by flushing the L1 whenever we return to userspace. Similarly for hypervisor vs guest. In order to flush the L1-D cache on exit, we add a section of nops at each (h)rfi location that returns to a lower privileged context, and patch that with some sequence. Newer firmwares are able to advertise to us that there is a special nop instruction that flushes the L1-D. If we do not see that advertised, we fall back to doing a displacement flush in software. For guest kernels we support migration between some CPU versions, and different CPUs may use different flush instructions. So that we are prepared to migrate to a machine with a different flush instruction activated, we may have to patch more than one flush instruction at boot if the hypervisor tells us to. In the end this patch is mostly the work of Nicholas Piggin and Michael Ellerman. However a cast of thousands contributed to analysis of the issue, earlier versions of the patch, back ports testing etc. Many thanks to all of them. Tested-by: Jon Masters <[email protected]> Signed-off-by: Nicholas Piggin <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent c730564 commit aa8a5e0

File tree

9 files changed

+286
-8
lines changed

9 files changed

+286
-8
lines changed

arch/powerpc/include/asm/exception-64s.h

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,34 +74,58 @@
7474
*/
7575
#define EX_R3 EX_DAR
7676

77-
/* Macros for annotating the expected destination of (h)rfid */
77+
/*
78+
* Macros for annotating the expected destination of (h)rfid
79+
*
80+
* The nop instructions allow us to insert one or more instructions to flush the
81+
* L1-D cache when returning to userspace or a guest.
82+
*/
83+
#define RFI_FLUSH_SLOT \
84+
RFI_FLUSH_FIXUP_SECTION; \
85+
nop; \
86+
nop; \
87+
nop
7888

7989
#define RFI_TO_KERNEL \
8090
rfid
8191

8292
#define RFI_TO_USER \
83-
rfid
93+
RFI_FLUSH_SLOT; \
94+
rfid; \
95+
b rfi_flush_fallback
8496

8597
#define RFI_TO_USER_OR_KERNEL \
86-
rfid
98+
RFI_FLUSH_SLOT; \
99+
rfid; \
100+
b rfi_flush_fallback
87101

88102
#define RFI_TO_GUEST \
89-
rfid
103+
RFI_FLUSH_SLOT; \
104+
rfid; \
105+
b rfi_flush_fallback
90106

91107
#define HRFI_TO_KERNEL \
92108
hrfid
93109

94110
#define HRFI_TO_USER \
95-
hrfid
111+
RFI_FLUSH_SLOT; \
112+
hrfid; \
113+
b hrfi_flush_fallback
96114

97115
#define HRFI_TO_USER_OR_KERNEL \
98-
hrfid
116+
RFI_FLUSH_SLOT; \
117+
hrfid; \
118+
b hrfi_flush_fallback
99119

100120
#define HRFI_TO_GUEST \
101-
hrfid
121+
RFI_FLUSH_SLOT; \
122+
hrfid; \
123+
b hrfi_flush_fallback
102124

103125
#define HRFI_TO_UNKNOWN \
104-
hrfid
126+
RFI_FLUSH_SLOT; \
127+
hrfid; \
128+
b hrfi_flush_fallback
105129

106130
#ifdef CONFIG_RELOCATABLE
107131
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \

arch/powerpc/include/asm/feature-fixups.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,20 @@ label##3: \
187187
FTR_ENTRY_OFFSET label##1b-label##3b; \
188188
.popsection;
189189

190+
#define RFI_FLUSH_FIXUP_SECTION \
191+
951: \
192+
.pushsection __rfi_flush_fixup,"a"; \
193+
.align 2; \
194+
952: \
195+
FTR_ENTRY_OFFSET 951b-952b; \
196+
.popsection;
197+
198+
190199
#ifndef __ASSEMBLY__
200+
#include <linux/types.h>
201+
202+
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
203+
191204
void apply_feature_fixups(void);
192205
void setup_feature_keys(void);
193206
#endif

arch/powerpc/include/asm/paca.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,16 @@ struct paca_struct {
232232
struct sibling_subcore_state *sibling_subcore_state;
233233
#endif
234234
#endif
235+
#ifdef CONFIG_PPC_BOOK3S_64
236+
/*
237+
* rfi fallback flush must be in its own cacheline to prevent
238+
* other paca data leaking into the L1d
239+
*/
240+
u64 exrfi[EX_SIZE] __aligned(0x80);
241+
void *rfi_flush_fallback_area;
242+
u64 l1d_flush_congruence;
243+
u64 l1d_flush_sets;
244+
#endif
235245
};
236246

237247
extern void copy_mm_to_paca(struct mm_struct *mm);

arch/powerpc/include/asm/setup.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
3939
static inline void pseries_little_endian_exceptions(void) {}
4040
#endif /* CONFIG_PPC_PSERIES */
4141

42+
void rfi_flush_enable(bool enable);
43+
44+
/* These are bit flags */
45+
enum l1d_flush_type {
46+
L1D_FLUSH_NONE = 0x1,
47+
L1D_FLUSH_FALLBACK = 0x2,
48+
L1D_FLUSH_ORI = 0x4,
49+
L1D_FLUSH_MTTRIG = 0x8,
50+
};
51+
52+
void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
53+
void do_rfi_flush_fixups(enum l1d_flush_type types);
54+
4255
#endif /* !__ASSEMBLY__ */
4356

4457
#endif /* _ASM_POWERPC_SETUP_H */

arch/powerpc/kernel/asm-offsets.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,11 @@ int main(void)
237237
OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
238238
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
239239
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
240+
OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
241+
OFFSET(PACA_EXRFI, paca_struct, exrfi);
242+
OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
243+
OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
244+
240245
#endif
241246
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
242247
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);

arch/powerpc/kernel/exceptions-64s.S

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,6 +1449,90 @@ masked_##_H##interrupt: \
14491449
b .; \
14501450
MASKED_DEC_HANDLER(_H)
14511451

1452+
TRAMP_REAL_BEGIN(rfi_flush_fallback)
1453+
SET_SCRATCH0(r13);
1454+
GET_PACA(r13);
1455+
std r9,PACA_EXRFI+EX_R9(r13)
1456+
std r10,PACA_EXRFI+EX_R10(r13)
1457+
std r11,PACA_EXRFI+EX_R11(r13)
1458+
std r12,PACA_EXRFI+EX_R12(r13)
1459+
std r8,PACA_EXRFI+EX_R13(r13)
1460+
mfctr r9
1461+
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1462+
ld r11,PACA_L1D_FLUSH_SETS(r13)
1463+
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1464+
/*
1465+
* The load adresses are at staggered offsets within cachelines,
1466+
* which suits some pipelines better (on others it should not
1467+
* hurt).
1468+
*/
1469+
addi r12,r12,8
1470+
mtctr r11
1471+
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1472+
1473+
/* order ld/st prior to dcbt stop all streams with flushing */
1474+
sync
1475+
1: li r8,0
1476+
.rept 8 /* 8-way set associative */
1477+
ldx r11,r10,r8
1478+
add r8,r8,r12
1479+
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1480+
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1481+
.endr
1482+
addi r10,r10,128 /* 128 byte cache line */
1483+
bdnz 1b
1484+
1485+
mtctr r9
1486+
ld r9,PACA_EXRFI+EX_R9(r13)
1487+
ld r10,PACA_EXRFI+EX_R10(r13)
1488+
ld r11,PACA_EXRFI+EX_R11(r13)
1489+
ld r12,PACA_EXRFI+EX_R12(r13)
1490+
ld r8,PACA_EXRFI+EX_R13(r13)
1491+
GET_SCRATCH0(r13);
1492+
rfid
1493+
1494+
TRAMP_REAL_BEGIN(hrfi_flush_fallback)
1495+
SET_SCRATCH0(r13);
1496+
GET_PACA(r13);
1497+
std r9,PACA_EXRFI+EX_R9(r13)
1498+
std r10,PACA_EXRFI+EX_R10(r13)
1499+
std r11,PACA_EXRFI+EX_R11(r13)
1500+
std r12,PACA_EXRFI+EX_R12(r13)
1501+
std r8,PACA_EXRFI+EX_R13(r13)
1502+
mfctr r9
1503+
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1504+
ld r11,PACA_L1D_FLUSH_SETS(r13)
1505+
ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1506+
/*
1507+
* The load adresses are at staggered offsets within cachelines,
1508+
* which suits some pipelines better (on others it should not
1509+
* hurt).
1510+
*/
1511+
addi r12,r12,8
1512+
mtctr r11
1513+
DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1514+
1515+
/* order ld/st prior to dcbt stop all streams with flushing */
1516+
sync
1517+
1: li r8,0
1518+
.rept 8 /* 8-way set associative */
1519+
ldx r11,r10,r8
1520+
add r8,r8,r12
1521+
xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1522+
add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1523+
.endr
1524+
addi r10,r10,128 /* 128 byte cache line */
1525+
bdnz 1b
1526+
1527+
mtctr r9
1528+
ld r9,PACA_EXRFI+EX_R9(r13)
1529+
ld r10,PACA_EXRFI+EX_R10(r13)
1530+
ld r11,PACA_EXRFI+EX_R11(r13)
1531+
ld r12,PACA_EXRFI+EX_R12(r13)
1532+
ld r8,PACA_EXRFI+EX_R13(r13)
1533+
GET_SCRATCH0(r13);
1534+
hrfid
1535+
14521536
/*
14531537
* Real mode exceptions actually use this too, but alternate
14541538
* instruction code patches (which end up in the common .text area)

arch/powerpc/kernel/setup_64.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,3 +801,82 @@ static int __init disable_hardlockup_detector(void)
801801
return 0;
802802
}
803803
early_initcall(disable_hardlockup_detector);
804+
805+
#ifdef CONFIG_PPC_BOOK3S_64
806+
static enum l1d_flush_type enabled_flush_types;
807+
static void *l1d_flush_fallback_area;
808+
bool rfi_flush;
809+
810+
static void do_nothing(void *unused)
811+
{
812+
/*
813+
* We don't need to do the flush explicitly, just enter+exit kernel is
814+
* sufficient, the RFI exit handlers will do the right thing.
815+
*/
816+
}
817+
818+
void rfi_flush_enable(bool enable)
819+
{
820+
if (rfi_flush == enable)
821+
return;
822+
823+
if (enable) {
824+
do_rfi_flush_fixups(enabled_flush_types);
825+
on_each_cpu(do_nothing, NULL, 1);
826+
} else
827+
do_rfi_flush_fixups(L1D_FLUSH_NONE);
828+
829+
rfi_flush = enable;
830+
}
831+
832+
static void init_fallback_flush(void)
833+
{
834+
u64 l1d_size, limit;
835+
int cpu;
836+
837+
l1d_size = ppc64_caches.l1d.size;
838+
limit = min(safe_stack_limit(), ppc64_rma_size);
839+
840+
/*
841+
* Align to L1d size, and size it at 2x L1d size, to catch possible
842+
* hardware prefetch runoff. We don't have a recipe for load patterns to
843+
* reliably avoid the prefetcher.
844+
*/
845+
l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
846+
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
847+
848+
for_each_possible_cpu(cpu) {
849+
/*
850+
* The fallback flush is currently coded for 8-way
851+
* associativity. Different associativity is possible, but it
852+
* will be treated as 8-way and may not evict the lines as
853+
* effectively.
854+
*
855+
* 128 byte lines are mandatory.
856+
*/
857+
u64 c = l1d_size / 8;
858+
859+
paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
860+
paca[cpu].l1d_flush_congruence = c;
861+
paca[cpu].l1d_flush_sets = c / 128;
862+
}
863+
}
864+
865+
void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
866+
{
867+
if (types & L1D_FLUSH_FALLBACK) {
868+
pr_info("rfi-flush: Using fallback displacement flush\n");
869+
init_fallback_flush();
870+
}
871+
872+
if (types & L1D_FLUSH_ORI)
873+
pr_info("rfi-flush: Using ori type flush\n");
874+
875+
if (types & L1D_FLUSH_MTTRIG)
876+
pr_info("rfi-flush: Using mttrig type flush\n");
877+
878+
enabled_flush_types = types;
879+
880+
rfi_flush_enable(enable);
881+
}
882+
#endif /* CONFIG_PPC_BOOK3S_64 */

arch/powerpc/kernel/vmlinux.lds.S

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,15 @@ SECTIONS
132132
/* Read-only data */
133133
RO_DATA(PAGE_SIZE)
134134

135+
#ifdef CONFIG_PPC64
136+
. = ALIGN(8);
137+
__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
138+
__start___rfi_flush_fixup = .;
139+
*(__rfi_flush_fixup)
140+
__stop___rfi_flush_fixup = .;
141+
}
142+
#endif
143+
135144
EXCEPTION_TABLE(0)
136145

137146
NOTES :kernel :notes

arch/powerpc/lib/feature-fixups.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
116116
}
117117
}
118118

119+
#ifdef CONFIG_PPC_BOOK3S_64
120+
void do_rfi_flush_fixups(enum l1d_flush_type types)
121+
{
122+
unsigned int instrs[3], *dest;
123+
long *start, *end;
124+
int i;
125+
126+
start = PTRRELOC(&__start___rfi_flush_fixup),
127+
end = PTRRELOC(&__stop___rfi_flush_fixup);
128+
129+
instrs[0] = 0x60000000; /* nop */
130+
instrs[1] = 0x60000000; /* nop */
131+
instrs[2] = 0x60000000; /* nop */
132+
133+
if (types & L1D_FLUSH_FALLBACK)
134+
/* b .+16 to fallback flush */
135+
instrs[0] = 0x48000010;
136+
137+
i = 0;
138+
if (types & L1D_FLUSH_ORI) {
139+
instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
140+
instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
141+
}
142+
143+
if (types & L1D_FLUSH_MTTRIG)
144+
instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
145+
146+
for (i = 0; start < end; start++, i++) {
147+
dest = (void *)start + *start;
148+
149+
pr_devel("patching dest %lx\n", (unsigned long)dest);
150+
151+
patch_instruction(dest, instrs[0]);
152+
patch_instruction(dest + 1, instrs[1]);
153+
patch_instruction(dest + 2, instrs[2]);
154+
}
155+
156+
printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
157+
}
158+
#endif /* CONFIG_PPC_BOOK3S_64 */
159+
119160
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
120161
{
121162
long *start, *end;

0 commit comments

Comments
 (0)