Skip to content

Commit 5138dd0

Browse files
paulusmackgregkh
authored andcommitted
KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler
commit c3856ae upstream. This fixes several bugs in the radix page fault handler relating to the way large pages in the memory backing the guest were handled. First, the check for large pages only checked for explicit huge pages and missed transparent huge pages. Then the check that the addresses (host virtual vs. guest physical) had appropriate alignment was wrong, meaning that the code never put a large page in the partition scoped radix tree; it was always demoted to a small page. Fixing this exposed bugs in kvmppc_create_pte(). We were never invalidating a 2MB PTE, which meant that if a page was initially faulted in without write permission and the guest then attempted to store to it, we would never update the PTE to have write permission. If we find a valid 2MB PTE in the PMD, we need to clear it and do a TLB invalidation before installing either the new 2MB PTE or a pointer to a page table page. This also corrects an assumption that get_user_pages_fast would set the _PAGE_DIRTY bit if we are writing, which is not true. Instead we mark the page dirty explicitly with set_page_dirty_lock(). This also means we don't need the dirty bit set on the host PTE when providing write access on a read fault. [[email protected] - use mark_pages_dirty instead of kvmppc_update_dirty_map] Signed-off-by: Paul Mackerras <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 82e91e0 commit 5138dd0

File tree

1 file changed

+46
-26
lines changed

1 file changed

+46
-26
lines changed

arch/powerpc/kvm/book3s_64_mmu_radix.c

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
#include <asm/pgalloc.h>
2020
#include <asm/pte-walk.h>
2121

22+
static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
23+
unsigned long gfn, unsigned int order);
24+
2225
/*
2326
* Supported radix tree geometry.
2427
* Like p9, we support either 5 or 9 bits at the first (lowest) level,
@@ -195,6 +198,12 @@ static void kvmppc_pte_free(pte_t *ptep)
195198
kmem_cache_free(kvm_pte_cache, ptep);
196199
}
197200

201+
/* Like pmd_huge() and pmd_large(), but works regardless of config options */
202+
static inline int pmd_is_leaf(pmd_t pmd)
203+
{
204+
return !!(pmd_val(pmd) & _PAGE_PTE);
205+
}
206+
198207
static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
199208
unsigned int level, unsigned long mmu_seq)
200209
{
@@ -219,7 +228,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
219228
else
220229
new_pmd = pmd_alloc_one(kvm->mm, gpa);
221230

222-
if (level == 0 && !(pmd && pmd_present(*pmd)))
231+
if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
223232
new_ptep = kvmppc_pte_alloc();
224233

225234
/* Check if we might have been invalidated; let the guest retry if so */
@@ -244,12 +253,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
244253
new_pmd = NULL;
245254
}
246255
pmd = pmd_offset(pud, gpa);
247-
if (pmd_large(*pmd)) {
248-
/* Someone else has instantiated a large page here; retry */
249-
ret = -EAGAIN;
250-
goto out_unlock;
251-
}
252-
if (level == 1 && !pmd_none(*pmd)) {
256+
if (pmd_is_leaf(*pmd)) {
257+
unsigned long lgpa = gpa & PMD_MASK;
258+
259+
/*
260+
* If we raced with another CPU which has just put
261+
* a 2MB pte in after we saw a pte page, try again.
262+
*/
263+
if (level == 0 && !new_ptep) {
264+
ret = -EAGAIN;
265+
goto out_unlock;
266+
}
267+
/* Valid 2MB page here already, remove it */
268+
old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
269+
~0UL, 0, lgpa, PMD_SHIFT);
270+
kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
271+
if (old & _PAGE_DIRTY) {
272+
unsigned long gfn = lgpa >> PAGE_SHIFT;
273+
struct kvm_memory_slot *memslot;
274+
memslot = gfn_to_memslot(kvm, gfn);
275+
if (memslot)
276+
mark_pages_dirty(kvm, memslot, gfn,
277+
PMD_SHIFT - PAGE_SHIFT);
278+
}
279+
} else if (level == 1 && !pmd_none(*pmd)) {
253280
/*
254281
* There's a page table page here, but we wanted
255282
* to install a large page. Tell the caller and let
@@ -412,28 +439,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
412439
} else {
413440
page = pages[0];
414441
pfn = page_to_pfn(page);
415-
if (PageHuge(page)) {
416-
page = compound_head(page);
417-
pte_size <<= compound_order(page);
442+
if (PageCompound(page)) {
443+
pte_size <<= compound_order(compound_head(page));
418444
/* See if we can insert a 2MB large-page PTE here */
419445
if (pte_size >= PMD_SIZE &&
420-
(gpa & PMD_MASK & PAGE_MASK) ==
421-
(hva & PMD_MASK & PAGE_MASK)) {
446+
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
447+
(hva & (PMD_SIZE - PAGE_SIZE))) {
422448
level = 1;
423449
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
424450
}
425451
}
426452
/* See if we can provide write access */
427453
if (writing) {
428-
/*
429-
* We assume gup_fast has set dirty on the host PTE.
430-
*/
431454
pgflags |= _PAGE_WRITE;
432455
} else {
433456
local_irq_save(flags);
434457
ptep = find_current_mm_pte(current->mm->pgd,
435458
hva, NULL, NULL);
436-
if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
459+
if (ptep && pte_write(*ptep))
437460
pgflags |= _PAGE_WRITE;
438461
local_irq_restore(flags);
439462
}
@@ -459,18 +482,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
459482
pte = pfn_pte(pfn, __pgprot(pgflags));
460483
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
461484
}
462-
if (ret == 0 || ret == -EAGAIN)
463-
ret = RESUME_GUEST;
464485

465486
if (page) {
466-
/*
467-
* We drop pages[0] here, not page because page might
468-
* have been set to the head page of a compound, but
469-
* we have to drop the reference on the correct tail
470-
* page to match the get inside gup()
471-
*/
472-
put_page(pages[0]);
487+
if (!ret && (pgflags & _PAGE_WRITE))
488+
set_page_dirty_lock(page);
489+
put_page(page);
473490
}
491+
492+
if (ret == 0 || ret == -EAGAIN)
493+
ret = RESUME_GUEST;
474494
return ret;
475495
}
476496

@@ -676,7 +696,7 @@ void kvmppc_free_radix(struct kvm *kvm)
676696
continue;
677697
pmd = pmd_offset(pud, 0);
678698
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
679-
if (pmd_huge(*pmd)) {
699+
if (pmd_is_leaf(*pmd)) {
680700
pmd_clear(pmd);
681701
continue;
682702
}

0 commit comments

Comments
 (0)