Skip to content

Commit 4cf302b

Browse files
paulusmackavikivity
authored andcommitted
KVM: PPC: Allow for read-only pages backing a Book3S HV guest
With this, if a guest does an H_ENTER with a read/write HPTE on a page which is currently read-only, we make the actual HPTE inserted be a read-only version of the HPTE. We now intercept protection faults as well as HPTE not found faults, and for a protection fault we work out whether it should be reflected to the guest (e.g. because the guest HPTE didn't allow write access to usermode) or handled by switching to kernel context and calling kvmppc_book3s_hv_page_fault, which will then request write access to the page and update the actual HPTE. Signed-off-by: Paul Mackerras <[email protected]> Signed-off-by: Alexander Graf <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent a355aa5 commit 4cf302b

File tree

4 files changed

+78
-17
lines changed

4 files changed

+78
-17
lines changed

arch/powerpc/include/asm/kvm_book3s_64.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,22 @@ static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
121121
return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
122122
}
123123

124+
static inline int hpte_is_writable(unsigned long ptel)
125+
{
126+
unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
127+
128+
return pp != PP_RXRX && pp != PP_RXXX;
129+
}
130+
131+
static inline unsigned long hpte_make_readonly(unsigned long ptel)
132+
{
133+
if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
134+
ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
135+
else
136+
ptel |= PP_RXRX;
137+
return ptel;
138+
}
139+
124140
static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
125141
{
126142
unsigned int wimg = ptel & HPTE_R_WIMG;
@@ -140,7 +156,7 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
140156
* Lock and read a linux PTE. If it's present and writable, atomically
141157
* set dirty and referenced bits and return the PTE, otherwise return 0.
142158
*/
143-
static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
159+
static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
144160
{
145161
pte_t pte, tmp;
146162

@@ -158,7 +174,7 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
158174

159175
if (pte_present(pte)) {
160176
pte = pte_mkyoung(pte);
161-
if (pte_write(pte))
177+
if (writing && pte_write(pte))
162178
pte = pte_mkdirty(pte);
163179
}
164180

arch/powerpc/kvm/book3s_64_mmu_hv.c

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
503503
struct page *page, *pages[1];
504504
long index, ret, npages;
505505
unsigned long is_io;
506+
unsigned int writing, write_ok;
506507
struct vm_area_struct *vma;
507508

508509
/*
@@ -553,8 +554,11 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
553554
pfn = 0;
554555
page = NULL;
555556
pte_size = PAGE_SIZE;
557+
writing = (dsisr & DSISR_ISSTORE) != 0;
558+
/* If writing != 0, then the HPTE must allow writing, if we get here */
559+
write_ok = writing;
556560
hva = gfn_to_hva_memslot(memslot, gfn);
557-
npages = get_user_pages_fast(hva, 1, 1, pages);
561+
npages = get_user_pages_fast(hva, 1, writing, pages);
558562
if (npages < 1) {
559563
/* Check if it's an I/O mapping */
560564
down_read(&current->mm->mmap_sem);
@@ -565,6 +569,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
565569
((hva - vma->vm_start) >> PAGE_SHIFT);
566570
pte_size = psize;
567571
is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
572+
write_ok = vma->vm_flags & VM_WRITE;
568573
}
569574
up_read(&current->mm->mmap_sem);
570575
if (!pfn)
@@ -575,6 +580,24 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
575580
page = compound_head(page);
576581
pte_size <<= compound_order(page);
577582
}
583+
/* if the guest wants write access, see if that is OK */
584+
if (!writing && hpte_is_writable(r)) {
585+
pte_t *ptep, pte;
586+
587+
/*
588+
* We need to protect against page table destruction
589+
* while looking up and updating the pte.
590+
*/
591+
rcu_read_lock_sched();
592+
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
593+
hva, NULL);
594+
if (ptep && pte_present(*ptep)) {
595+
pte = kvmppc_read_update_linux_pte(ptep, 1);
596+
if (pte_write(pte))
597+
write_ok = 1;
598+
}
599+
rcu_read_unlock_sched();
600+
}
578601
pfn = page_to_pfn(page);
579602
}
580603

@@ -595,6 +618,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
595618

596619
/* Set the HPTE to point to pfn */
597620
r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
621+
if (hpte_is_writable(r) && !write_ok)
622+
r = hpte_make_readonly(r);
598623
ret = RESUME_GUEST;
599624
preempt_disable();
600625
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
@@ -614,14 +639,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
614639
unlock_rmap(rmap);
615640
goto out_unlock;
616641
}
617-
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
642+
643+
if (hptep[0] & HPTE_V_VALID) {
644+
/* HPTE was previously valid, so we need to invalidate it */
645+
unlock_rmap(rmap);
646+
hptep[0] |= HPTE_V_ABSENT;
647+
kvmppc_invalidate_hpte(kvm, hptep, index);
648+
} else {
649+
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
650+
}
618651

619652
hptep[1] = r;
620653
eieio();
621654
hptep[0] = hpte[0];
622655
asm volatile("ptesync" : : : "memory");
623656
preempt_enable();
624-
if (page)
657+
if (page && hpte_is_writable(r))
625658
SetPageDirty(page);
626659

627660
out_put:

arch/powerpc/kvm/book3s_hv_rm_mmu.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
120120
}
121121

122122
static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
123-
unsigned long *pte_sizep)
123+
int writing, unsigned long *pte_sizep)
124124
{
125125
pte_t *ptep;
126126
unsigned long ps = *pte_sizep;
@@ -137,7 +137,7 @@ static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
137137
return __pte(0);
138138
if (!pte_present(*ptep))
139139
return __pte(0);
140-
return kvmppc_read_update_linux_pte(ptep);
140+
return kvmppc_read_update_linux_pte(ptep, writing);
141141
}
142142

143143
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
@@ -154,12 +154,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
154154
unsigned long is_io;
155155
unsigned long *rmap;
156156
pte_t pte;
157+
unsigned int writing;
157158
unsigned long mmu_seq;
158159
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
159160

160161
psize = hpte_page_size(pteh, ptel);
161162
if (!psize)
162163
return H_PARAMETER;
164+
writing = hpte_is_writable(ptel);
163165
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
164166

165167
/* used later to detect if we might have been invalidated */
@@ -208,8 +210,11 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
208210

209211
/* Look up the Linux PTE for the backing page */
210212
pte_size = psize;
211-
pte = lookup_linux_pte(vcpu, hva, &pte_size);
213+
pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
212214
if (pte_present(pte)) {
215+
if (writing && !pte_write(pte))
216+
/* make the actual HPTE be read-only */
217+
ptel = hpte_make_readonly(ptel);
213218
is_io = hpte_cache_bits(pte_val(pte));
214219
pa = pte_pfn(pte) << PAGE_SHIFT;
215220
}
@@ -678,7 +683,9 @@ EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
678683

679684
/*
680685
* Called in real mode to check whether an HPTE not found fault
681-
* is due to accessing a paged-out page or an emulated MMIO page.
686+
* is due to accessing a paged-out page or an emulated MMIO page,
687+
* or if a protection fault is due to accessing a page that the
688+
* guest wanted read/write access to but which we made read-only.
682689
* Returns a possibly modified status (DSISR) value if not
683690
* (i.e. pass the interrupt to the guest),
684691
* -1 to pass the fault up to host kernel mode code, -2 to do that
@@ -696,12 +703,17 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
696703
struct revmap_entry *rev;
697704
unsigned long pp, key;
698705

699-
valid = HPTE_V_VALID | HPTE_V_ABSENT;
706+
/* For protection fault, expect to find a valid HPTE */
707+
valid = HPTE_V_VALID;
708+
if (status & DSISR_NOHPTE)
709+
valid |= HPTE_V_ABSENT;
700710

701711
index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
702-
if (index < 0)
703-
return status; /* there really was no HPTE */
704-
712+
if (index < 0) {
713+
if (status & DSISR_NOHPTE)
714+
return status; /* there really was no HPTE */
715+
return 0; /* for prot fault, HPTE disappeared */
716+
}
705717
hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
706718
v = hpte[0] & ~HPTE_V_HVLOCK;
707719
r = hpte[1];
@@ -712,8 +724,8 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
712724
asm volatile("lwsync" : : : "memory");
713725
hpte[0] = v;
714726

715-
/* If the HPTE is valid by now, retry the instruction */
716-
if (v & HPTE_V_VALID)
727+
/* For not found, if the HPTE is valid by now, retry the instruction */
728+
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
717729
return 0;
718730

719731
/* Check access permissions to the page */

arch/powerpc/kvm/book3s_hv_rmhandlers.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,8 +1114,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
11141114
kvmppc_hdsi:
11151115
mfspr r4, SPRN_HDAR
11161116
mfspr r6, SPRN_HDSISR
1117-
/* HPTE not found fault? */
1118-
andis. r0, r6, DSISR_NOHPTE@h
1117+
/* HPTE not found fault or protection fault? */
1118+
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
11191119
beq 1f /* if not, send it to the guest */
11201120
andi. r0, r11, MSR_DR /* data relocation enabled? */
11211121
beq 3f

0 commit comments

Comments
 (0)