Skip to content

Commit 5790069

Browse files
committed
KVM: PPC: Book3S HV: Virtualize doorbell facility on POWER9
On POWER9, we no longer have the restriction that we had on POWER8 where all threads in a core have to be in the same partition, so the CPU threads are now independent. However, we still want to be able to run guests with a virtual SMT topology, if only to allow migration of guests from POWER8 systems to POWER9. A guest that has a virtual SMT mode greater than 1 will expect to be able to use the doorbell facility; it will expect the msgsndp and msgclrp instructions to work appropriately and to be able to read sensible values from the TIR (thread identification register) and DPDES (directed privileged doorbell exception status) special-purpose registers. However, since each CPU thread is a separate sub-processor in POWER9, these instructions and registers can only be used within a single CPU thread. In order for these instructions to appear to act correctly according to the guest's virtual SMT mode, we have to trap and emulate them. We cause them to trap by clearing the HFSCR_MSGP bit in the HFSCR register. The emulation is triggered by the hypervisor facility unavailable interrupt that occurs when the guest uses them. To cause a doorbell interrupt to occur within the guest, we set the DPDES register to 1. If the guest has interrupts enabled, the CPU will generate a doorbell interrupt and clear the DPDES register in hardware. The DPDES hardware register for the guest is saved in the vcpu->arch.vcore->dpdes field. Since this gets written by the guest exit code, other VCPUs wishing to cause a doorbell interrupt don't write that field directly, but instead set a vcpu->arch.doorbell_request flag. This is consumed and set to 0 by the guest entry code, which then sets DPDES to 1. Emulating reads of the DPDES register is somewhat involved, because it requires reading the doorbell pending interrupt status of all of the VCPU threads in the virtual core, and if any of those VCPUs are running, their doorbell status is only up-to-date in the hardware DPDES registers of the CPUs where they are running. In order to get a reasonable approximation of the current doorbell status, we send those CPUs an IPI, causing an exit from the guest which will update the vcpu->arch.vcore->dpdes field. We then use that value in constructing the emulated DPDES register value. Signed-off-by: Paul Mackerras <[email protected]>
1 parent 3c31352 commit 5790069

File tree

6 files changed

+153
-11
lines changed

6 files changed

+153
-11
lines changed

arch/powerpc/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ struct kvm_resize_hpt;
268268
struct kvm_arch {
269269
unsigned int lpid;
270270
unsigned int smt_mode; /* # vcpus per virtual core */
271+
unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */
271272
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
272273
unsigned int tlb_sets;
273274
struct kvm_hpt_info hpt;
@@ -712,6 +713,7 @@ struct kvm_vcpu_arch {
712713
unsigned long pending_exceptions;
713714
u8 ceded;
714715
u8 prodded;
716+
u8 doorbell_request;
715717
u32 last_inst;
716718

717719
struct swait_queue_head *wqp;

arch/powerpc/include/asm/ppc-opcode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@
103103
#define OP_31_XOP_STBUX 247
104104
#define OP_31_XOP_LHZX 279
105105
#define OP_31_XOP_LHZUX 311
106+
#define OP_31_XOP_MSGSNDP 142
107+
#define OP_31_XOP_MSGCLRP 174
106108
#define OP_31_XOP_MFSPR 339
107109
#define OP_31_XOP_LWAX 341
108110
#define OP_31_XOP_LHAX 343

arch/powerpc/kernel/asm-offsets.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,7 @@ int main(void)
513513
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
514514
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
515515
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
516+
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
516517
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
517518
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
518519
OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);

arch/powerpc/kvm/book3s_hv.c

Lines changed: 125 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
#include <linux/of.h>
4747

4848
#include <asm/reg.h>
49+
#include <asm/ppc-opcode.h>
50+
#include <asm/disassemble.h>
4951
#include <asm/cputable.h>
5052
#include <asm/cacheflush.h>
5153
#include <asm/tlbflush.h>
@@ -681,6 +683,15 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
681683
int thr;
682684
struct kvmppc_vcore *vc;
683685

686+
if (vcpu->arch.doorbell_request)
687+
return true;
688+
/*
689+
* Ensure that the read of vcore->dpdes comes after the read
690+
* of vcpu->doorbell_request. This barrier matches the
691+
* lwsync in book3s_hv_rmhandlers.S just before the
692+
* fast_guest_return label.
693+
*/
694+
smp_rmb();
684695
vc = vcpu->arch.vcore;
685696
thr = vcpu->vcpu_id - vc->first_vcpuid;
686697
return !!(vc->dpdes & (1 << thr));
@@ -937,6 +948,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
937948
}
938949
}
939950

951+
static void do_nothing(void *x)
952+
{
953+
}
954+
955+
static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
956+
{
957+
int thr, cpu, pcpu, nthreads;
958+
struct kvm_vcpu *v;
959+
unsigned long dpdes;
960+
961+
nthreads = vcpu->kvm->arch.emul_smt_mode;
962+
dpdes = 0;
963+
cpu = vcpu->vcpu_id & ~(nthreads - 1);
964+
for (thr = 0; thr < nthreads; ++thr, ++cpu) {
965+
v = kvmppc_find_vcpu(vcpu->kvm, cpu);
966+
if (!v)
967+
continue;
968+
/*
969+
* If the vcpu is currently running on a physical cpu thread,
970+
* interrupt it in order to pull it out of the guest briefly,
971+
* which will update its vcore->dpdes value.
972+
*/
973+
pcpu = READ_ONCE(v->cpu);
974+
if (pcpu >= 0)
975+
smp_call_function_single(pcpu, do_nothing, NULL, 1);
976+
if (kvmppc_doorbell_pending(v))
977+
dpdes |= 1 << thr;
978+
}
979+
return dpdes;
980+
}
981+
982+
/*
983+
* On POWER9, emulate doorbell-related instructions in order to
984+
* give the guest the illusion of running on a multi-threaded core.
985+
* The instructions emulated are msgsndp, msgclrp, mfspr TIR,
986+
* and mfspr DPDES.
987+
*/
988+
static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
989+
{
990+
u32 inst, rb, thr;
991+
unsigned long arg;
992+
struct kvm *kvm = vcpu->kvm;
993+
struct kvm_vcpu *tvcpu;
994+
995+
if (!cpu_has_feature(CPU_FTR_ARCH_300))
996+
return EMULATE_FAIL;
997+
if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
998+
return RESUME_GUEST;
999+
if (get_op(inst) != 31)
1000+
return EMULATE_FAIL;
1001+
rb = get_rb(inst);
1002+
thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
1003+
switch (get_xop(inst)) {
1004+
case OP_31_XOP_MSGSNDP:
1005+
arg = kvmppc_get_gpr(vcpu, rb);
1006+
if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1007+
break;
1008+
arg &= 0x3f;
1009+
if (arg >= kvm->arch.emul_smt_mode)
1010+
break;
1011+
tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
1012+
if (!tvcpu)
1013+
break;
1014+
if (!tvcpu->arch.doorbell_request) {
1015+
tvcpu->arch.doorbell_request = 1;
1016+
kvmppc_fast_vcpu_kick_hv(tvcpu);
1017+
}
1018+
break;
1019+
case OP_31_XOP_MSGCLRP:
1020+
arg = kvmppc_get_gpr(vcpu, rb);
1021+
if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1022+
break;
1023+
vcpu->arch.vcore->dpdes = 0;
1024+
vcpu->arch.doorbell_request = 0;
1025+
break;
1026+
case OP_31_XOP_MFSPR:
1027+
switch (get_sprn(inst)) {
1028+
case SPRN_TIR:
1029+
arg = thr;
1030+
break;
1031+
case SPRN_DPDES:
1032+
arg = kvmppc_read_dpdes(vcpu);
1033+
break;
1034+
default:
1035+
return EMULATE_FAIL;
1036+
}
1037+
kvmppc_set_gpr(vcpu, get_rt(inst), arg);
1038+
break;
1039+
default:
1040+
return EMULATE_FAIL;
1041+
}
1042+
kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
1043+
return RESUME_GUEST;
1044+
}
1045+
9401046
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
9411047
struct task_struct *tsk)
9421048
{
@@ -1059,12 +1165,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
10591165
break;
10601166
/*
10611167
* This occurs if the guest (kernel or userspace), does something that
1062-
* is prohibited by HFSCR. We just generate a program interrupt to
1063-
* the guest.
1168+
* is prohibited by HFSCR.
1169+
* On POWER9, this could be a doorbell instruction that we need
1170+
* to emulate.
1171+
* Otherwise, we just generate a program interrupt to the guest.
10641172
*/
10651173
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
1066-
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1067-
r = RESUME_GUEST;
1174+
r = EMULATE_FAIL;
1175+
if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
1176+
r = kvmppc_emulate_doorbell_instr(vcpu);
1177+
if (r == EMULATE_FAIL) {
1178+
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1179+
r = RESUME_GUEST;
1180+
}
10681181
break;
10691182
case BOOK3S_INTERRUPT_HV_RM_HARD:
10701183
r = RESUME_PASSTHROUGH;
@@ -1826,10 +1939,14 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
18261939
* This value is only used on POWER9.
18271940
* On POWER9 DD1, TM doesn't work, so we make sure to
18281941
* prevent the guest from using it.
1942+
* On POWER9, we want to virtualize the doorbell facility, so we
1943+
* turn off the HFSCR bit, which causes those instructions to trap.
18291944
*/
18301945
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
18311946
if (!cpu_has_feature(CPU_FTR_TM))
18321947
vcpu->arch.hfscr &= ~HFSCR_TM;
1948+
if (cpu_has_feature(CPU_FTR_ARCH_300))
1949+
vcpu->arch.hfscr &= ~HFSCR_MSGP;
18331950

18341951
kvmppc_mmu_book3s_hv_init(vcpu);
18351952

@@ -1880,6 +1997,7 @@ static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
18801997
unsigned long flags)
18811998
{
18821999
int err;
2000+
int esmt = 0;
18832001

18842002
if (flags)
18852003
return -EINVAL;
@@ -1897,12 +2015,14 @@ static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
18972015
* On POWER9, the threading mode is "loose",
18982016
* so each vcpu gets its own vcore.
18992017
*/
2018+
esmt = smt_mode;
19002019
smt_mode = 1;
19012020
}
19022021
mutex_lock(&kvm->lock);
19032022
err = -EBUSY;
19042023
if (!kvm->arch.online_vcores) {
19052024
kvm->arch.smt_mode = smt_mode;
2025+
kvm->arch.emul_smt_mode = esmt;
19062026
err = 0;
19072027
}
19082028
mutex_unlock(&kvm->lock);
@@ -2025,10 +2145,6 @@ static void kvmppc_release_hwthread(int cpu)
20252145
tpaca->kvm_hstate.kvm_split_mode = NULL;
20262146
}
20272147

2028-
static void do_nothing(void *x)
2029-
{
2030-
}
2031-
20322148
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
20332149
{
20342150
int i;
@@ -3600,6 +3716,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
36003716
kvm->arch.smt_mode = threads_per_subcore;
36013717
else
36023718
kvm->arch.smt_mode = 1;
3719+
kvm->arch.emul_smt_mode = 1;
36033720

36043721
/*
36053722
* Create a debugfs directory for the VM

arch/powerpc/kvm/book3s_hv_rmhandlers.S

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
10691069
mr r9, r4
10701070
bl kvmppc_msr_interrupt
10711071
5:
1072+
BEGIN_FTR_SECTION
1073+
b fast_guest_return
1074+
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1075+
/* On POWER9, check for pending doorbell requests */
1076+
lbz r0, VCPU_DBELL_REQ(r4)
1077+
cmpwi r0, 0
1078+
beq fast_guest_return
1079+
ld r5, HSTATE_KVM_VCORE(r13)
1080+
/* Set DPDES register so the CPU will take a doorbell interrupt */
1081+
li r0, 1
1082+
mtspr SPRN_DPDES, r0
1083+
std r0, VCORE_DPDES(r5)
1084+
/* Make sure other cpus see vcore->dpdes set before dbell req clear */
1085+
lwsync
1086+
/* Clear the pending doorbell request */
1087+
li r0, 0
1088+
stb r0, VCPU_DBELL_REQ(r4)
10721089

10731090
/*
10741091
* Required state:

arch/powerpc/kvm/powerpc.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -554,9 +554,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
554554
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
555555
case KVM_CAP_PPC_SMT:
556556
r = 0;
557-
if (kvm)
558-
r = kvm->arch.smt_mode;
559-
else if (hv_enabled) {
557+
if (kvm) {
558+
if (kvm->arch.emul_smt_mode > 1)
559+
r = kvm->arch.emul_smt_mode;
560+
else
561+
r = kvm->arch.smt_mode;
562+
} else if (hv_enabled) {
560563
if (cpu_has_feature(CPU_FTR_ARCH_300))
561564
r = 1;
562565
else

0 commit comments

Comments
 (0)