Skip to content

Commit 71b7bf1

Browse files
committed
Merge branch 'kvm-e500-check-writable-pfn' into HEAD
The new __kvm_faultin_pfn() function is upset by the fact that e500 KVM ignores host page permissions - __kvm_faultin requires a "writable" outgoing argument, but e500 KVM is passing NULL. While a simple fix would be possible that simply allows writable to be NULL, it is quite ugly to have e500 KVM ignore completely the host permissions and map readonly host pages as guest-writable. Merge a more complete fix and remove the VMA-based attempts at building huge shadow TLB entries. Using a PTE lookup, similar to what is done for x86, is better and works with remap_pfn_range() because it does not assume that VM_PFNMAP areas are contiguous. Note that the same incorrect logic is there in ARM's get_vma_page_shift() and RISC-V's kvm_riscv_gstage_ioremap(). Fortunately, for e500 most of the code is already there; it just has to be changed to compute the range from find_linux_pte()'s output rather than find_vma(). The new code works for both VM_PFNMAP and hugetlb mappings, so the latter is removed. Patches 2-5 were tested by the reporter, Christian Zigotzky. Since the difference with v1 is minimal, I am going to send it to Linus today.
2 parents f07044d + 55f4db7 commit 71b7bf1

File tree

2 files changed

+85
-116
lines changed

2 files changed

+85
-116
lines changed

arch/powerpc/kvm/e500.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ enum vcpu_ftr {
3434
#define E500_TLB_BITMAP (1 << 30)
3535
/* TLB1 entry is mapped by host TLB0 */
3636
#define E500_TLB_TLB0 (1 << 29)
37+
/* entry is writable on the host */
38+
#define E500_TLB_WRITABLE (1 << 28)
3739
/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
3840
#define E500_TLB_MAS2_ATTR (0x7f)
3941

arch/powerpc/kvm/e500_mmu_host.c

Lines changed: 83 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,14 @@ static inline unsigned int tlb1_max_shadow_size(void)
4545
return host_tlb_params[1].entries - tlbcam_index - 1;
4646
}
4747

48-
static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
48+
static inline u32 e500_shadow_mas3_attrib(u32 mas3, bool writable, int usermode)
4949
{
5050
/* Mask off reserved bits. */
5151
mas3 &= MAS3_ATTRIB_MASK;
5252

53+
if (!writable)
54+
mas3 &= ~(MAS3_UW|MAS3_SW);
55+
5356
#ifndef CONFIG_KVM_BOOKE_HV
5457
if (!usermode) {
5558
/* Guest is in supervisor mode,
@@ -242,17 +245,18 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
242245
return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
243246
}
244247

245-
static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref,
248+
static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
246249
struct kvm_book3e_206_tlb_entry *gtlbe,
247-
kvm_pfn_t pfn, unsigned int wimg)
250+
kvm_pfn_t pfn, unsigned int wimg,
251+
bool writable)
248252
{
249253
ref->pfn = pfn;
250254
ref->flags = E500_TLB_VALID;
255+
if (writable)
256+
ref->flags |= E500_TLB_WRITABLE;
251257

252258
/* Use guest supplied MAS2_G and MAS2_E */
253259
ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
254-
255-
return tlbe_is_writable(gtlbe);
256260
}
257261

258262
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
@@ -305,14 +309,15 @@ static void kvmppc_e500_setup_stlbe(
305309
{
306310
kvm_pfn_t pfn = ref->pfn;
307311
u32 pr = vcpu->arch.shared->msr & MSR_PR;
312+
bool writable = !!(ref->flags & E500_TLB_WRITABLE);
308313

309314
BUG_ON(!(ref->flags & E500_TLB_VALID));
310315

311316
/* Force IPROT=0 for all guest mappings. */
312317
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
313318
stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
314319
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
315-
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
320+
e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr);
316321
}
317322

318323
static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -321,15 +326,14 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
321326
struct tlbe_ref *ref)
322327
{
323328
struct kvm_memory_slot *slot;
324-
unsigned long pfn = 0; /* silence GCC warning */
329+
unsigned int psize;
330+
unsigned long pfn;
325331
struct page *page = NULL;
326332
unsigned long hva;
327-
int pfnmap = 0;
328333
int tsize = BOOK3E_PAGESZ_4K;
329334
int ret = 0;
330335
unsigned long mmu_seq;
331336
struct kvm *kvm = vcpu_e500->vcpu.kvm;
332-
unsigned long tsize_pages = 0;
333337
pte_t *ptep;
334338
unsigned int wimg = 0;
335339
pgd_t *pgdir;
@@ -351,110 +355,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
351355
slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
352356
hva = gfn_to_hva_memslot(slot, gfn);
353357

354-
if (tlbsel == 1) {
355-
struct vm_area_struct *vma;
356-
mmap_read_lock(kvm->mm);
357-
358-
vma = find_vma(kvm->mm, hva);
359-
if (vma && hva >= vma->vm_start &&
360-
(vma->vm_flags & VM_PFNMAP)) {
361-
/*
362-
* This VMA is a physically contiguous region (e.g.
363-
* /dev/mem) that bypasses normal Linux page
364-
* management. Find the overlap between the
365-
* vma and the memslot.
366-
*/
367-
368-
unsigned long start, end;
369-
unsigned long slot_start, slot_end;
370-
371-
pfnmap = 1;
372-
373-
start = vma->vm_pgoff;
374-
end = start +
375-
vma_pages(vma);
376-
377-
pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
378-
379-
slot_start = pfn - (gfn - slot->base_gfn);
380-
slot_end = slot_start + slot->npages;
381-
382-
if (start < slot_start)
383-
start = slot_start;
384-
if (end > slot_end)
385-
end = slot_end;
386-
387-
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
388-
MAS1_TSIZE_SHIFT;
389-
390-
/*
391-
* e500 doesn't implement the lowest tsize bit,
392-
* or 1K pages.
393-
*/
394-
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
395-
396-
/*
397-
* Now find the largest tsize (up to what the guest
398-
* requested) that will cover gfn, stay within the
399-
* range, and for which gfn and pfn are mutually
400-
* aligned.
401-
*/
402-
403-
for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
404-
unsigned long gfn_start, gfn_end;
405-
tsize_pages = 1UL << (tsize - 2);
406-
407-
gfn_start = gfn & ~(tsize_pages - 1);
408-
gfn_end = gfn_start + tsize_pages;
409-
410-
if (gfn_start + pfn - gfn < start)
411-
continue;
412-
if (gfn_end + pfn - gfn > end)
413-
continue;
414-
if ((gfn & (tsize_pages - 1)) !=
415-
(pfn & (tsize_pages - 1)))
416-
continue;
417-
418-
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
419-
pfn &= ~(tsize_pages - 1);
420-
break;
421-
}
422-
} else if (vma && hva >= vma->vm_start &&
423-
is_vm_hugetlb_page(vma)) {
424-
unsigned long psize = vma_kernel_pagesize(vma);
425-
426-
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
427-
MAS1_TSIZE_SHIFT;
428-
429-
/*
430-
* Take the largest page size that satisfies both host
431-
* and guest mapping
432-
*/
433-
tsize = min(__ilog2(psize) - 10, tsize);
434-
435-
/*
436-
* e500 doesn't implement the lowest tsize bit,
437-
* or 1K pages.
438-
*/
439-
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
440-
}
441-
442-
mmap_read_unlock(kvm->mm);
443-
}
444-
445-
if (likely(!pfnmap)) {
446-
tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
447-
pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page);
448-
if (is_error_noslot_pfn(pfn)) {
449-
if (printk_ratelimit())
450-
pr_err("%s: real page not found for gfn %lx\n",
451-
__func__, (long)gfn);
452-
return -EINVAL;
453-
}
454-
455-
/* Align guest and physical address to page map boundaries */
456-
pfn &= ~(tsize_pages - 1);
457-
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
358+
pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page);
359+
if (is_error_noslot_pfn(pfn)) {
360+
if (printk_ratelimit())
361+
pr_err("%s: real page not found for gfn %lx\n",
362+
__func__, (long)gfn);
363+
return -EINVAL;
458364
}
459365

460366
spin_lock(&kvm->mmu_lock);
@@ -472,14 +378,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
472378
* can't run hence pfn won't change.
473379
*/
474380
local_irq_save(flags);
475-
ptep = find_linux_pte(pgdir, hva, NULL, NULL);
381+
ptep = find_linux_pte(pgdir, hva, NULL, &psize);
476382
if (ptep) {
477383
pte_t pte = READ_ONCE(*ptep);
478384

479385
if (pte_present(pte)) {
480386
wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
481387
MAS2_WIMGE_MASK;
482-
local_irq_restore(flags);
483388
} else {
484389
local_irq_restore(flags);
485390
pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
@@ -488,10 +393,72 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
488393
goto out;
489394
}
490395
}
491-
writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
396+
local_irq_restore(flags);
397+
398+
if (psize && tlbsel == 1) {
399+
unsigned long psize_pages, tsize_pages;
400+
unsigned long start, end;
401+
unsigned long slot_start, slot_end;
402+
403+
psize_pages = 1UL << (psize - PAGE_SHIFT);
404+
start = pfn & ~(psize_pages - 1);
405+
end = start + psize_pages;
406+
407+
slot_start = pfn - (gfn - slot->base_gfn);
408+
slot_end = slot_start + slot->npages;
409+
410+
if (start < slot_start)
411+
start = slot_start;
412+
if (end > slot_end)
413+
end = slot_end;
414+
415+
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
416+
MAS1_TSIZE_SHIFT;
417+
418+
/*
419+
* Any page size that doesn't satisfy the host mapping
420+
* will fail the start and end tests.
421+
*/
422+
tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize);
423+
424+
/*
425+
* e500 doesn't implement the lowest tsize bit,
426+
* or 1K pages.
427+
*/
428+
tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
429+
430+
/*
431+
* Now find the largest tsize (up to what the guest
432+
* requested) that will cover gfn, stay within the
433+
* range, and for which gfn and pfn are mutually
434+
* aligned.
435+
*/
436+
437+
for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
438+
unsigned long gfn_start, gfn_end;
439+
tsize_pages = 1UL << (tsize - 2);
440+
441+
gfn_start = gfn & ~(tsize_pages - 1);
442+
gfn_end = gfn_start + tsize_pages;
443+
444+
if (gfn_start + pfn - gfn < start)
445+
continue;
446+
if (gfn_end + pfn - gfn > end)
447+
continue;
448+
if ((gfn & (tsize_pages - 1)) !=
449+
(pfn & (tsize_pages - 1)))
450+
continue;
451+
452+
gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
453+
pfn &= ~(tsize_pages - 1);
454+
break;
455+
}
456+
}
492457

458+
kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable);
493459
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
494460
ref, gvaddr, stlbe);
461+
writable = tlbe_is_writable(stlbe);
495462

496463
/* Clear i-cache for new pages */
497464
kvmppc_mmu_flush_icache(pfn);

0 commit comments

Comments
 (0)