Skip to content

Commit 080012b

Browse files
committed
Merge tag 'powerpc-4.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Michael Ellerman: "The highlight is Ben's patch to work around a host killing bug when running KVM guests with the Radix MMU on Power9. See the long change log of that commit for more detail. And then three fairly minor fixes: - fix of_node_put() underflow during reconfig remove, using old DLPAR tools. - fix recently introduced ld version check with 64-bit LE-only toolchain. - free the subpage_prot_table correctly, avoiding a memory leak. Thanks to: Aneesh Kumar K.V, Benjamin Herrenschmidt, Laurent Vivier" * tag 'powerpc-4.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/mm/hash: Free the subpage_prot_table correctly powerpc/Makefile: Fix ld version check with 64-bit LE-only toolchain powerpc/pseries: Fix of_node_put() underflow during reconfig remove powerpc/mm/radix: Workaround prefetch issue with KVM
2 parents e26f1be + 0da12a7 commit 080012b

File tree

9 files changed

+168
-36
lines changed

9 files changed

+168
-36
lines changed

arch/powerpc/Makefile

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64
5959
machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
6060
UTS_MACHINE := $(subst $(space),,$(machine-y))
6161

62+
# XXX This needs to be before we override LD below
63+
ifdef CONFIG_PPC32
64+
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
65+
else
66+
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
67+
# Have the linker provide sfpr if possible.
68+
# There is a corresponding test in arch/powerpc/lib/Makefile
69+
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
70+
else
71+
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
72+
endif
73+
endif
74+
6275
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
6376
override LD += -EL
6477
LDEMULATION := lppc
@@ -190,18 +203,6 @@ else
190203
CHECKFLAGS += -D__LITTLE_ENDIAN__
191204
endif
192205

193-
ifdef CONFIG_PPC32
194-
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
195-
else
196-
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
197-
# Have the linker provide sfpr if possible.
198-
# There is a corresponding test in arch/powerpc/lib/Makefile
199-
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
200-
else
201-
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
202-
endif
203-
endif
204-
205206
ifeq ($(CONFIG_476FPE_ERR46),y)
206207
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
207208
-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds

arch/powerpc/include/asm/book3s/64/mmu.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb;
5959
#define PRTS_MASK 0x1f /* process table size field */
6060
#define PRTB_MASK 0x0ffffffffffff000UL
6161

62-
/*
63-
* Limit process table to PAGE_SIZE table. This
64-
* also limit the max pid we can support.
65-
* MAX_USER_CONTEXT * 16 bytes of space.
66-
*/
67-
#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
68-
#define PRTB_ENTRIES (1ul << CONTEXT_BITS)
62+
/* Number of supported PID bits */
63+
extern unsigned int mmu_pid_bits;
64+
65+
/* Base PID to allocate from */
66+
extern unsigned int mmu_base_pid;
67+
68+
#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
69+
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
6970

7071
/*
7172
* Power9 currently only support 64K partition table size.

arch/powerpc/include/asm/mmu_context.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd);
4545

4646
#ifdef CONFIG_PPC_BOOK3S_64
4747
extern void radix__switch_mmu_context(struct mm_struct *prev,
48-
struct mm_struct *next);
48+
struct mm_struct *next);
4949
static inline void switch_mmu_context(struct mm_struct *prev,
5050
struct mm_struct *next,
5151
struct task_struct *tsk)
@@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id);
6767
extern void mmu_context_init(void);
6868
#endif
6969

70+
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
71+
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
72+
#else
73+
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
74+
#endif
75+
7076
extern void switch_cop(struct mm_struct *next);
7177
extern int use_cop(unsigned long acop, struct mm_struct *mm);
7278
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
@@ -79,9 +85,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
7985
struct mm_struct *next,
8086
struct task_struct *tsk)
8187
{
88+
bool new_on_cpu = false;
89+
8290
/* Mark this context has been used on the new CPU */
83-
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next)))
91+
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
8492
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
93+
new_on_cpu = true;
94+
}
8595

8696
/* 32-bit keeps track of the current PGDIR in the thread struct */
8797
#ifdef CONFIG_PPC32
@@ -109,6 +119,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
109119
if (cpu_has_feature(CPU_FTR_ALTIVEC))
110120
asm volatile ("dssall");
111121
#endif /* CONFIG_ALTIVEC */
122+
123+
if (new_on_cpu)
124+
radix_kvm_prefetch_workaround(next);
125+
112126
/*
113127
* The actual HW switching method differs between the various
114128
* sub architectures. Out of line for now

arch/powerpc/kvm/book3s_hv_rmhandlers.S

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,12 +1443,14 @@ mc_cont:
14431443
ori r6,r6,1
14441444
mtspr SPRN_CTRLT,r6
14451445
4:
1446-
/* Read the guest SLB and save it away */
1446+
/* Check if we are running hash or radix and store it in cr2 */
14471447
ld r5, VCPU_KVM(r9)
14481448
lbz r0, KVM_RADIX(r5)
1449-
cmpwi r0, 0
1449+
cmpwi cr2,r0,0
1450+
1451+
/* Read the guest SLB and save it away */
14501452
li r5, 0
1451-
bne 3f /* for radix, save 0 entries */
1453+
bne cr2, 3f /* for radix, save 0 entries */
14521454
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
14531455
mtctr r0
14541456
li r6,0
@@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96)
17121714
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
17131715
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
17141716
22:
1715-
/* Clear out SLB */
1716-
li r5,0
1717-
slbmte r5,r5
1718-
slbia
1719-
ptesync
17201717

17211718
/* Restore host values of some registers */
17221719
BEGIN_FTR_SECTION
@@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION
17371734
mtspr SPRN_PID, r7
17381735
mtspr SPRN_IAMR, r8
17391736
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1737+
1738+
#ifdef CONFIG_PPC_RADIX_MMU
1739+
/*
1740+
* Are we running hash or radix ?
1741+
*/
1742+
beq cr2,3f
1743+
1744+
/* Radix: Handle the case where the guest used an illegal PID */
1745+
LOAD_REG_ADDR(r4, mmu_base_pid)
1746+
lwz r3, VCPU_GUEST_PID(r9)
1747+
lwz r5, 0(r4)
1748+
cmpw cr0,r3,r5
1749+
blt 2f
1750+
1751+
/*
1752+
* Illegal PID, the HW might have prefetched and cached in the TLB
1753+
* some translations for the LPID 0 / guest PID combination which
1754+
* Linux doesn't know about, so we need to flush that PID out of
1755+
* the TLB. First we need to set LPIDR to 0 so tlbiel applies to
1756+
* the right context.
1757+
*/
1758+
li r0,0
1759+
mtspr SPRN_LPID,r0
1760+
isync
1761+
1762+
/* Then do a congruence class local flush */
1763+
ld r6,VCPU_KVM(r9)
1764+
lwz r0,KVM_TLB_SETS(r6)
1765+
mtctr r0
1766+
li r7,0x400 /* IS field = 0b01 */
1767+
ptesync
1768+
sldi r0,r3,32 /* RS has PID */
1769+
1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
1770+
addi r7,r7,0x1000
1771+
bdnz 1b
1772+
ptesync
1773+
1774+
2: /* Flush the ERAT on radix P9 DD1 guest exit */
17401775
BEGIN_FTR_SECTION
17411776
PPC_INVALIDATE_ERAT
17421777
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
1778+
b 4f
1779+
#endif /* CONFIG_PPC_RADIX_MMU */
17431780

1781+
/* Hash: clear out SLB */
1782+
3: li r5,0
1783+
slbmte r5,r5
1784+
slbia
1785+
ptesync
1786+
4:
17441787
/*
17451788
* POWER7/POWER8 guest -> host partition switch code.
17461789
* We don't have to lock against tlbies but we do

arch/powerpc/mm/mmu_context_book3s64.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,10 @@ static int hash__init_new_context(struct mm_struct *mm)
126126
static int radix__init_new_context(struct mm_struct *mm)
127127
{
128128
unsigned long rts_field;
129-
int index;
129+
int index, max_id;
130130

131-
index = alloc_context_id(1, PRTB_ENTRIES - 1);
131+
max_id = (1 << mmu_pid_bits) - 1;
132+
index = alloc_context_id(mmu_base_pid, max_id);
132133
if (index < 0)
133134
return index;
134135

arch/powerpc/mm/pgtable-radix.c

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525

2626
#include <trace/events/thp.h>
2727

28+
unsigned int mmu_pid_bits;
29+
unsigned int mmu_base_pid;
30+
2831
static int native_register_process_table(unsigned long base, unsigned long pg_sz,
2932
unsigned long table_size)
3033
{
@@ -261,11 +264,34 @@ static void __init radix_init_pgtable(void)
261264
for_each_memblock(memory, reg)
262265
WARN_ON(create_physical_mapping(reg->base,
263266
reg->base + reg->size));
267+
268+
/* Find out how many PID bits are supported */
269+
if (cpu_has_feature(CPU_FTR_HVMODE)) {
270+
if (!mmu_pid_bits)
271+
mmu_pid_bits = 20;
272+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
273+
/*
274+
* When KVM is possible, we only use the top half of the
275+
* PID space to avoid collisions between host and guest PIDs
276+
* which can cause problems due to prefetch when exiting the
277+
* guest with AIL=3
278+
*/
279+
mmu_base_pid = 1 << (mmu_pid_bits - 1);
280+
#else
281+
mmu_base_pid = 1;
282+
#endif
283+
} else {
284+
/* The guest uses the bottom half of the PID space */
285+
if (!mmu_pid_bits)
286+
mmu_pid_bits = 19;
287+
mmu_base_pid = 1;
288+
}
289+
264290
/*
265291
* Allocate Partition table and process table for the
266292
* host.
267293
*/
268-
BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
294+
BUG_ON(PRTB_SIZE_SHIFT > 36);
269295
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
270296
/*
271297
* Fill in the process table.
@@ -339,6 +365,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
339365
if (type == NULL || strcmp(type, "cpu") != 0)
340366
return 0;
341367

368+
/* Find MMU PID size */
369+
prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
370+
if (prop && size == 4)
371+
mmu_pid_bits = be32_to_cpup(prop);
372+
373+
/* Grab page size encodings */
342374
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
343375
if (!prop)
344376
return 0;

arch/powerpc/mm/subpage-prot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm)
3636
}
3737
}
3838
addr = 0;
39-
for (i = 0; i < 2; ++i) {
39+
for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
4040
p = spt->protptrs[i];
4141
if (!p)
4242
continue;

arch/powerpc/mm/tlb-radix.c

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
#include <linux/mm.h>
1313
#include <linux/hugetlb.h>
1414
#include <linux/memblock.h>
15-
#include <asm/ppc-opcode.h>
1615

16+
#include <asm/ppc-opcode.h>
1717
#include <asm/tlb.h>
1818
#include <asm/tlbflush.h>
1919
#include <asm/trace.h>
20-
20+
#include <asm/cputhreads.h>
2121

2222
#define RIC_FLUSH_TLB 0
2323
#define RIC_FLUSH_PWC 1
@@ -454,3 +454,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
454454
else
455455
radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
456456
}
457+
458+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
459+
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
460+
{
461+
unsigned int pid = mm->context.id;
462+
463+
if (unlikely(pid == MMU_NO_CONTEXT))
464+
return;
465+
466+
/*
467+
* If this context hasn't run on that CPU before and KVM is
468+
* around, there's a slim chance that the guest on another
469+
* CPU just brought in obsolete translation into the TLB of
470+
* this CPU due to a bad prefetch using the guest PID on
471+
* the way into the hypervisor.
472+
*
473+
* We work around this here. If KVM is possible, we check if
474+
* any sibling thread is in KVM. If it is, the window may exist
475+
* and thus we flush that PID from the core.
476+
*
477+
* A potential future improvement would be to mark which PIDs
478+
* have never been used on the system and avoid it if the PID
479+
* is new and the process has no other cpumask bit set.
480+
*/
481+
if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
482+
int cpu = smp_processor_id();
483+
int sib = cpu_first_thread_sibling(cpu);
484+
bool flush = false;
485+
486+
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
487+
if (sib == cpu)
488+
continue;
489+
if (paca[sib].kvm_hstate.kvm_vcpu)
490+
flush = true;
491+
}
492+
if (flush)
493+
_tlbiel_pid(pid, RIC_FLUSH_ALL);
494+
}
495+
}
496+
EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
497+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

arch/powerpc/platforms/pseries/reconfig.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
8282

8383
of_detach_node(np);
8484
of_node_put(parent);
85-
of_node_put(np); /* Must decrement the refcount */
8685
return 0;
8786
}
8887

0 commit comments

Comments
 (0)