Skip to content

Commit a92bce9

Browse files
paulusmackavikivity
authored andcommitted
KVM: PPC: Book3S HV: Keep HPTE locked when invalidating
This reworks the implementations of the H_REMOVE and H_BULK_REMOVE hcalls to make sure that we keep the HPTE locked and in the reverse- mapping chain until we have finished invalidating it. Previously we would remove it from the chain and unlock it before invalidating it, leaving a tiny window when the guest could access the page even though we believe we have removed it from the guest (e.g., kvm_unmap_hva() has been called for the page and has found no HPTEs in the chain). In addition, we'll need this for future patches where we will need to read the R and C bits in the HPTE after invalidating it. Doing this required restructuring kvmppc_h_bulk_remove() substantially. Since we want to batch up the tlbies, we now need to keep several HPTEs locked simultaneously. In order to avoid possible deadlocks, we don't spin on the HPTE bitlock for any except the first HPTE in a batch. If we can't acquire the HPTE bitlock for the second or subsequent HPTE, we terminate the batch at that point, do the tlbies that we have accumulated so far, unlock those HPTEs, and then start a new batch to do the remaining invalidations. Signed-off-by: Paul Mackerras <[email protected]> Signed-off-by: Alexander Graf <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent b543403 commit a92bce9

File tree

1 file changed

+125
-87
lines changed

1 file changed

+125
-87
lines changed

arch/powerpc/kvm/book3s_hv_rm_mmu.c

Lines changed: 125 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
140140
return kvmppc_read_update_linux_pte(ptep, writing);
141141
}
142142

143+
static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
144+
{
145+
asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
146+
hpte[0] = hpte_v;
147+
}
148+
143149
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
144150
long pte_index, unsigned long pteh, unsigned long ptel)
145151
{
@@ -356,6 +362,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
356362
struct kvm *kvm = vcpu->kvm;
357363
unsigned long *hpte;
358364
unsigned long v, r, rb;
365+
struct revmap_entry *rev;
359366

360367
if (pte_index >= HPT_NPTE)
361368
return H_PARAMETER;
@@ -368,113 +375,146 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
368375
hpte[0] &= ~HPTE_V_HVLOCK;
369376
return H_NOT_FOUND;
370377
}
371-
if (atomic_read(&kvm->online_vcpus) == 1)
372-
flags |= H_LOCAL;
373-
vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
374-
vcpu->arch.gpr[5] = r = hpte[1];
375-
rb = compute_tlbie_rb(v, r, pte_index);
376-
if (v & HPTE_V_VALID)
378+
379+
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
380+
v = hpte[0] & ~HPTE_V_HVLOCK;
381+
if (v & HPTE_V_VALID) {
382+
hpte[0] &= ~HPTE_V_VALID;
383+
rb = compute_tlbie_rb(v, hpte[1], pte_index);
384+
if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
385+
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
386+
cpu_relax();
387+
asm volatile("ptesync" : : : "memory");
388+
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
389+
: : "r" (rb), "r" (kvm->arch.lpid));
390+
asm volatile("ptesync" : : : "memory");
391+
kvm->arch.tlbie_lock = 0;
392+
} else {
393+
asm volatile("ptesync" : : : "memory");
394+
asm volatile("tlbiel %0" : : "r" (rb));
395+
asm volatile("ptesync" : : : "memory");
396+
}
377397
remove_revmap_chain(kvm, pte_index, v);
378-
smp_wmb();
379-
hpte[0] = 0;
380-
if (!(v & HPTE_V_VALID))
381-
return H_SUCCESS;
382-
if (!(flags & H_LOCAL)) {
383-
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
384-
cpu_relax();
385-
asm volatile("ptesync" : : : "memory");
386-
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
387-
: : "r" (rb), "r" (kvm->arch.lpid));
388-
asm volatile("ptesync" : : : "memory");
389-
kvm->arch.tlbie_lock = 0;
390-
} else {
391-
asm volatile("ptesync" : : : "memory");
392-
asm volatile("tlbiel %0" : : "r" (rb));
393-
asm volatile("ptesync" : : : "memory");
394398
}
399+
r = rev->guest_rpte;
400+
unlock_hpte(hpte, 0);
401+
402+
vcpu->arch.gpr[4] = v;
403+
vcpu->arch.gpr[5] = r;
395404
return H_SUCCESS;
396405
}
397406

398407
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
399408
{
400409
struct kvm *kvm = vcpu->kvm;
401410
unsigned long *args = &vcpu->arch.gpr[4];
402-
unsigned long *hp, tlbrb[4];
403-
long int i, found;
404-
long int n_inval = 0;
405-
unsigned long flags, req, pte_index;
411+
unsigned long *hp, *hptes[4], tlbrb[4];
412+
long int i, j, k, n, found, indexes[4];
413+
unsigned long flags, req, pte_index, rcbits;
406414
long int local = 0;
407415
long int ret = H_SUCCESS;
416+
struct revmap_entry *rev, *revs[4];
408417

409418
if (atomic_read(&kvm->online_vcpus) == 1)
410419
local = 1;
411-
for (i = 0; i < 4; ++i) {
412-
pte_index = args[i * 2];
413-
flags = pte_index >> 56;
414-
pte_index &= ((1ul << 56) - 1);
415-
req = flags >> 6;
416-
flags &= 3;
417-
if (req == 3)
418-
break;
419-
if (req != 1 || flags == 3 ||
420-
pte_index >= HPT_NPTE) {
421-
/* parameter error */
422-
args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
423-
ret = H_PARAMETER;
424-
break;
425-
}
426-
hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
427-
while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
428-
cpu_relax();
429-
found = 0;
430-
if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
431-
switch (flags & 3) {
432-
case 0: /* absolute */
433-
found = 1;
420+
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
421+
n = 0;
422+
for (; i < 4; ++i) {
423+
j = i * 2;
424+
pte_index = args[j];
425+
flags = pte_index >> 56;
426+
pte_index &= ((1ul << 56) - 1);
427+
req = flags >> 6;
428+
flags &= 3;
429+
if (req == 3) { /* no more requests */
430+
i = 4;
434431
break;
435-
case 1: /* andcond */
436-
if (!(hp[0] & args[i * 2 + 1]))
437-
found = 1;
432+
}
433+
if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
434+
/* parameter error */
435+
args[j] = ((0xa0 | flags) << 56) + pte_index;
436+
ret = H_PARAMETER;
438437
break;
439-
case 2: /* AVPN */
440-
if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
438+
}
439+
hp = (unsigned long *)
440+
(kvm->arch.hpt_virt + (pte_index << 4));
441+
/* to avoid deadlock, don't spin except for first */
442+
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
443+
if (n)
444+
break;
445+
while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
446+
cpu_relax();
447+
}
448+
found = 0;
449+
if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
450+
switch (flags & 3) {
451+
case 0: /* absolute */
441452
found = 1;
442-
break;
453+
break;
454+
case 1: /* andcond */
455+
if (!(hp[0] & args[j + 1]))
456+
found = 1;
457+
break;
458+
case 2: /* AVPN */
459+
if ((hp[0] & ~0x7fUL) == args[j + 1])
460+
found = 1;
461+
break;
462+
}
463+
}
464+
if (!found) {
465+
hp[0] &= ~HPTE_V_HVLOCK;
466+
args[j] = ((0x90 | flags) << 56) + pte_index;
467+
continue;
443468
}
469+
470+
args[j] = ((0x80 | flags) << 56) + pte_index;
471+
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
472+
/* insert R and C bits from guest PTE */
473+
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
474+
args[j] |= rcbits << (56 - 5);
475+
476+
if (!(hp[0] & HPTE_V_VALID))
477+
continue;
478+
479+
hp[0] &= ~HPTE_V_VALID; /* leave it locked */
480+
tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
481+
indexes[n] = j;
482+
hptes[n] = hp;
483+
revs[n] = rev;
484+
++n;
444485
}
445-
if (!found) {
446-
hp[0] &= ~HPTE_V_HVLOCK;
447-
args[i * 2] = ((0x90 | flags) << 56) + pte_index;
448-
continue;
486+
487+
if (!n)
488+
break;
489+
490+
/* Now that we've collected a batch, do the tlbies */
491+
if (!local) {
492+
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
493+
cpu_relax();
494+
asm volatile("ptesync" : : : "memory");
495+
for (k = 0; k < n; ++k)
496+
asm volatile(PPC_TLBIE(%1,%0) : :
497+
"r" (tlbrb[k]),
498+
"r" (kvm->arch.lpid));
499+
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
500+
kvm->arch.tlbie_lock = 0;
501+
} else {
502+
asm volatile("ptesync" : : : "memory");
503+
for (k = 0; k < n; ++k)
504+
asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
505+
asm volatile("ptesync" : : : "memory");
449506
}
450-
/* insert R and C bits from PTE */
451-
flags |= (hp[1] >> 5) & 0x0c;
452-
args[i * 2] = ((0x80 | flags) << 56) + pte_index;
453-
if (hp[0] & HPTE_V_VALID) {
454-
tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
507+
508+
for (k = 0; k < n; ++k) {
509+
j = indexes[k];
510+
pte_index = args[j] & ((1ul << 56) - 1);
511+
hp = hptes[k];
512+
rev = revs[k];
455513
remove_revmap_chain(kvm, pte_index, hp[0]);
514+
unlock_hpte(hp, 0);
456515
}
457-
smp_wmb();
458-
hp[0] = 0;
459-
}
460-
if (n_inval == 0)
461-
return ret;
462-
463-
if (!local) {
464-
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
465-
cpu_relax();
466-
asm volatile("ptesync" : : : "memory");
467-
for (i = 0; i < n_inval; ++i)
468-
asm volatile(PPC_TLBIE(%1,%0)
469-
: : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
470-
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
471-
kvm->arch.tlbie_lock = 0;
472-
} else {
473-
asm volatile("ptesync" : : : "memory");
474-
for (i = 0; i < n_inval; ++i)
475-
asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
476-
asm volatile("ptesync" : : : "memory");
477516
}
517+
478518
return ret;
479519
}
480520

@@ -720,9 +760,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
720760
rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
721761
gr = rev->guest_rpte;
722762

723-
/* Unlock the HPTE */
724-
asm volatile("lwsync" : : : "memory");
725-
hpte[0] = v;
763+
unlock_hpte(hpte, v);
726764

727765
/* For not found, if the HPTE is valid by now, retry the instruction */
728766
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))

0 commit comments

Comments
 (0)