Skip to content

Commit 047e657

Browse files
kvaneeshmpe
authored andcommitted
powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
On POWER9, under some circumstances, a broadcast TLB invalidation will fail to invalidate the ERAT cache on some threads when there are parallel mtpidr/mtlpidr happening on other threads of the same core. This can cause stores to continue to go to a page after it's unmapped. The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie flush. This additional TLB flush will cause the ERAT cache invalidation. Since we are using PID=0 or LPID=0, we don't get filtered out by the TLB snoop filtering logic. We need to still follow this up with another tlbie to take care of store vs tlbie ordering issue explained in commit: a5d4b58 ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9"). The presence of ERAT cache implies we can still get new stores and they may miss store queue marking flush. Cc: [email protected] Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 09ce98c commit 047e657

File tree

5 files changed

+134
-22
lines changed

5 files changed

+134
-22
lines changed

arch/powerpc/include/asm/cputable.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ static inline void cpu_feature_keys_init(void) { }
211211
#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
212212
#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
213213
#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
214+
#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
214215

215216
#ifndef __ASSEMBLY__
216217

@@ -457,7 +458,7 @@ static inline void cpu_feature_keys_init(void) { }
457458
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
458459
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
459460
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
460-
CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR)
461+
CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
461462
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
462463
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
463464
#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \

arch/powerpc/kernel/dt_cpu_ftrs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,8 @@ static __init void update_tlbie_feature_flag(unsigned long pvr)
715715
WARN_ONCE(1, "Unknown PVR");
716716
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
717717
}
718+
719+
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
718720
}
719721
}
720722

arch/powerpc/kvm/book3s_hv_rm_mmu.c

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
433433
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
434434
}
435435

436+
static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
437+
{
438+
439+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
440+
/* Radix flush for a hash guest */
441+
442+
unsigned long rb,rs,prs,r,ric;
443+
444+
rb = PPC_BIT(52); /* IS = 2 */
445+
rs = 0; /* lpid = 0 */
446+
prs = 0; /* partition scoped */
447+
r = 1; /* radix format */
448+
ric = 0; /* RIC_FLSUH_TLB */
449+
450+
/*
451+
* Need the extra ptesync to make sure we don't
452+
* re-order the tlbie
453+
*/
454+
asm volatile("ptesync": : :"memory");
455+
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
456+
: : "r"(rb), "i"(r), "i"(prs),
457+
"i"(ric), "r"(rs) : "memory");
458+
}
459+
460+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
461+
asm volatile("ptesync": : :"memory");
462+
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
463+
"r" (rb_value), "r" (lpid));
464+
}
465+
}
466+
436467
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
437468
long npages, int global, bool need_sync)
438469
{
@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
451482
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
452483
}
453484

454-
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
455-
/*
456-
* Need the extra ptesync to make sure we don't
457-
* re-order the tlbie
458-
*/
459-
asm volatile("ptesync": : :"memory");
460-
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
461-
"r" (rbvalues[0]), "r" (kvm->arch.lpid));
462-
}
463-
485+
fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
464486
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
465487
} else {
466488
if (need_sync)

arch/powerpc/mm/book3s64/hash_native.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,31 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
197197
return va;
198198
}
199199

200-
static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
200+
static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
201+
int apsize, int ssize)
201202
{
203+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
204+
/* Radix flush for a hash guest */
205+
206+
unsigned long rb,rs,prs,r,ric;
207+
208+
rb = PPC_BIT(52); /* IS = 2 */
209+
rs = 0; /* lpid = 0 */
210+
prs = 0; /* partition scoped */
211+
r = 1; /* radix format */
212+
ric = 0; /* RIC_FLSUH_TLB */
213+
214+
/*
215+
* Need the extra ptesync to make sure we don't
216+
* re-order the tlbie
217+
*/
218+
asm volatile("ptesync": : :"memory");
219+
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
220+
: : "r"(rb), "i"(r), "i"(prs),
221+
"i"(ric), "r"(rs) : "memory");
222+
}
223+
224+
202225
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
203226
/* Need the extra ptesync to ensure we don't reorder tlbie*/
204227
asm volatile("ptesync": : :"memory");
@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
283306
asm volatile("ptesync": : :"memory");
284307
} else {
285308
__tlbie(vpn, psize, apsize, ssize);
286-
fixup_tlbie(vpn, psize, apsize, ssize);
309+
fixup_tlbie_vpn(vpn, psize, apsize, ssize);
287310
asm volatile("eieio; tlbsync; ptesync": : :"memory");
288311
}
289312
if (lock_tlbie && !use_local)
@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
856879
/*
857880
* Just do one more with the last used values.
858881
*/
859-
fixup_tlbie(vpn, psize, psize, ssize);
882+
fixup_tlbie_vpn(vpn, psize, psize, ssize);
860883
asm volatile("eieio; tlbsync; ptesync":::"memory");
861884

862885
if (lock_tlbie)

arch/powerpc/mm/book3s64/radix_tlb.c

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -196,21 +196,82 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
196196
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
197197
}
198198

199-
static inline void fixup_tlbie(void)
199+
200+
static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
201+
unsigned long ap)
202+
{
203+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
204+
asm volatile("ptesync": : :"memory");
205+
__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
206+
}
207+
208+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
209+
asm volatile("ptesync": : :"memory");
210+
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
211+
}
212+
}
213+
214+
static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
215+
unsigned long ap)
216+
{
217+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
218+
asm volatile("ptesync": : :"memory");
219+
__tlbie_pid(0, RIC_FLUSH_TLB);
220+
}
221+
222+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
223+
asm volatile("ptesync": : :"memory");
224+
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
225+
}
226+
}
227+
228+
static inline void fixup_tlbie_pid(unsigned long pid)
200229
{
201-
unsigned long pid = 0;
230+
/*
231+
* We can use any address for the invalidation, pick one which is
232+
* probably unused as an optimisation.
233+
*/
202234
unsigned long va = ((1UL << 52) - 1);
203235

236+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
237+
asm volatile("ptesync": : :"memory");
238+
__tlbie_pid(0, RIC_FLUSH_TLB);
239+
}
240+
204241
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
205242
asm volatile("ptesync": : :"memory");
206243
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
207244
}
208245
}
209246

247+
248+
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
249+
unsigned long ap)
250+
{
251+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
252+
asm volatile("ptesync": : :"memory");
253+
__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
254+
}
255+
256+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
257+
asm volatile("ptesync": : :"memory");
258+
__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
259+
}
260+
}
261+
210262
static inline void fixup_tlbie_lpid(unsigned long lpid)
211263
{
264+
/*
265+
* We can use any address for the invalidation, pick one which is
266+
* probably unused as an optimisation.
267+
*/
212268
unsigned long va = ((1UL << 52) - 1);
213269

270+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
271+
asm volatile("ptesync": : :"memory");
272+
__tlbie_lpid(0, RIC_FLUSH_TLB);
273+
}
274+
214275
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
215276
asm volatile("ptesync": : :"memory");
216277
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
@@ -258,15 +319,16 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
258319
switch (ric) {
259320
case RIC_FLUSH_TLB:
260321
__tlbie_pid(pid, RIC_FLUSH_TLB);
322+
fixup_tlbie_pid(pid);
261323
break;
262324
case RIC_FLUSH_PWC:
263325
__tlbie_pid(pid, RIC_FLUSH_PWC);
264326
break;
265327
case RIC_FLUSH_ALL:
266328
default:
267329
__tlbie_pid(pid, RIC_FLUSH_ALL);
330+
fixup_tlbie_pid(pid);
268331
}
269-
fixup_tlbie();
270332
asm volatile("eieio; tlbsync; ptesync": : :"memory");
271333
}
272334

@@ -315,15 +377,16 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
315377
switch (ric) {
316378
case RIC_FLUSH_TLB:
317379
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
380+
fixup_tlbie_lpid(lpid);
318381
break;
319382
case RIC_FLUSH_PWC:
320383
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
321384
break;
322385
case RIC_FLUSH_ALL:
323386
default:
324387
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
388+
fixup_tlbie_lpid(lpid);
325389
}
326-
fixup_tlbie_lpid(lpid);
327390
asm volatile("eieio; tlbsync; ptesync": : :"memory");
328391
}
329392

@@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
390453

391454
for (addr = start; addr < end; addr += page_size)
392455
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
456+
457+
fixup_tlbie_va_range(addr - page_size, pid, ap);
393458
}
394459

395460
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
399464

400465
asm volatile("ptesync": : :"memory");
401466
__tlbie_va(va, pid, ap, ric);
402-
fixup_tlbie();
467+
fixup_tlbie_va(va, pid, ap);
403468
asm volatile("eieio; tlbsync; ptesync": : :"memory");
404469
}
405470

@@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
457522

458523
asm volatile("ptesync": : :"memory");
459524
__tlbie_lpid_va(va, lpid, ap, ric);
460-
fixup_tlbie_lpid(lpid);
525+
fixup_tlbie_lpid_va(va, lpid, ap);
461526
asm volatile("eieio; tlbsync; ptesync": : :"memory");
462527
}
463528

@@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
469534
if (also_pwc)
470535
__tlbie_pid(pid, RIC_FLUSH_PWC);
471536
__tlbie_va_range(start, end, pid, page_size, psize);
472-
fixup_tlbie();
473537
asm volatile("eieio; tlbsync; ptesync": : :"memory");
474538
}
475539

@@ -856,7 +920,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
856920
if (gflush)
857921
__tlbie_va_range(gstart, gend, pid,
858922
PUD_SIZE, MMU_PAGE_1G);
859-
fixup_tlbie();
923+
860924
asm volatile("eieio; tlbsync; ptesync": : :"memory");
861925
} else {
862926
_tlbiel_va_range_multicast(mm,

0 commit comments

Comments
 (0)