Skip to content

Commit 890ca9a

Browse files
yhuang-intelavikivity
authored andcommitted
KVM: Add MCE support
The related MSRs are emulated. MCE capability is exported via extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED. A new vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation such as the mcg_cap. MCE is injected via vcpu ioctl command KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are not implemented. Signed-off-by: Huang Ying <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent af24a4e commit 890ca9a

File tree

4 files changed

+222
-24
lines changed

4 files changed

+222
-24
lines changed

arch/x86/include/asm/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#define __KVM_HAVE_USER_NMI
1818
#define __KVM_HAVE_GUEST_DEBUG
1919
#define __KVM_HAVE_MSIX
20+
#define __KVM_HAVE_MCE
2021

2122
/* Architectural interrupt line count. */
2223
#define KVM_NR_INTERRUPTS 256

arch/x86/include/asm/kvm_host.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,11 @@ struct kvm_vcpu_arch {
373373
unsigned long dr6;
374374
unsigned long dr7;
375375
unsigned long eff_db[KVM_NR_DB_REGS];
376+
377+
u64 mcg_cap;
378+
u64 mcg_status;
379+
u64 mcg_ctl;
380+
u64 *mce_banks;
376381
};
377382

378383
struct kvm_mem_alias {

arch/x86/kvm/x86.c

Lines changed: 196 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <asm/msr.h>
4343
#include <asm/desc.h>
4444
#include <asm/mtrr.h>
45+
#include <asm/mce.h>
4546

4647
#define MAX_IO_MSRS 256
4748
#define CR0_RESERVED_BITS \
@@ -55,6 +56,10 @@
5556
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
5657

5758
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
59+
60+
#define KVM_MAX_MCE_BANKS 32
61+
#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
62+
5863
/* EFER defaults:
5964
* - enable syscall per default because its emulated by KVM
6065
* - enable LME and LMA per default on 64 bit KVM
@@ -777,23 +782,43 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
777782
return 0;
778783
}
779784

780-
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
785+
static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
781786
{
787+
u64 mcg_cap = vcpu->arch.mcg_cap;
788+
unsigned bank_num = mcg_cap & 0xff;
789+
782790
switch (msr) {
783-
case MSR_EFER:
784-
set_efer(vcpu, data);
785-
break;
786-
case MSR_IA32_MC0_STATUS:
787-
pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
788-
__func__, data);
789-
break;
790791
case MSR_IA32_MCG_STATUS:
791-
pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
792-
__func__, data);
792+
vcpu->arch.mcg_status = data;
793793
break;
794794
case MSR_IA32_MCG_CTL:
795-
pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
796-
__func__, data);
795+
if (!(mcg_cap & MCG_CTL_P))
796+
return 1;
797+
if (data != 0 && data != ~(u64)0)
798+
return -1;
799+
vcpu->arch.mcg_ctl = data;
800+
break;
801+
default:
802+
if (msr >= MSR_IA32_MC0_CTL &&
803+
msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
804+
u32 offset = msr - MSR_IA32_MC0_CTL;
805+
/* only 0 or all 1s can be written to IA32_MCi_CTL */
806+
if ((offset & 0x3) == 0 &&
807+
data != 0 && data != ~(u64)0)
808+
return -1;
809+
vcpu->arch.mce_banks[offset] = data;
810+
break;
811+
}
812+
return 1;
813+
}
814+
return 0;
815+
}
816+
817+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
818+
{
819+
switch (msr) {
820+
case MSR_EFER:
821+
set_efer(vcpu, data);
797822
break;
798823
case MSR_IA32_DEBUGCTLMSR:
799824
if (!data) {
@@ -849,6 +874,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
849874
kvm_request_guest_time_update(vcpu);
850875
break;
851876
}
877+
case MSR_IA32_MCG_CTL:
878+
case MSR_IA32_MCG_STATUS:
879+
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
880+
return set_msr_mce(vcpu, msr, data);
852881
default:
853882
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
854883
return 1;
@@ -904,26 +933,49 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
904933
return 0;
905934
}
906935

907-
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
936+
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
908937
{
909938
u64 data;
939+
u64 mcg_cap = vcpu->arch.mcg_cap;
940+
unsigned bank_num = mcg_cap & 0xff;
910941

911942
switch (msr) {
912-
case 0xc0010010: /* SYSCFG */
913-
case 0xc0010015: /* HWCR */
914-
case MSR_IA32_PLATFORM_ID:
915943
case MSR_IA32_P5_MC_ADDR:
916944
case MSR_IA32_P5_MC_TYPE:
917-
case MSR_IA32_MC0_CTL:
918-
case MSR_IA32_MCG_STATUS:
945+
data = 0;
946+
break;
919947
case MSR_IA32_MCG_CAP:
948+
data = vcpu->arch.mcg_cap;
949+
break;
920950
case MSR_IA32_MCG_CTL:
921-
case MSR_IA32_MC0_MISC:
922-
case MSR_IA32_MC0_MISC+4:
923-
case MSR_IA32_MC0_MISC+8:
924-
case MSR_IA32_MC0_MISC+12:
925-
case MSR_IA32_MC0_MISC+16:
926-
case MSR_IA32_MC0_MISC+20:
951+
if (!(mcg_cap & MCG_CTL_P))
952+
return 1;
953+
data = vcpu->arch.mcg_ctl;
954+
break;
955+
case MSR_IA32_MCG_STATUS:
956+
data = vcpu->arch.mcg_status;
957+
break;
958+
default:
959+
if (msr >= MSR_IA32_MC0_CTL &&
960+
msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
961+
u32 offset = msr - MSR_IA32_MC0_CTL;
962+
data = vcpu->arch.mce_banks[offset];
963+
break;
964+
}
965+
return 1;
966+
}
967+
*pdata = data;
968+
return 0;
969+
}
970+
971+
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
972+
{
973+
u64 data;
974+
975+
switch (msr) {
976+
case 0xc0010010: /* SYSCFG */
977+
case 0xc0010015: /* HWCR */
978+
case MSR_IA32_PLATFORM_ID:
927979
case MSR_IA32_UCODE_REV:
928980
case MSR_IA32_EBL_CR_POWERON:
929981
case MSR_IA32_DEBUGCTLMSR:
@@ -966,6 +1018,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
9661018
case MSR_KVM_SYSTEM_TIME:
9671019
data = vcpu->arch.time;
9681020
break;
1021+
case MSR_IA32_P5_MC_ADDR:
1022+
case MSR_IA32_P5_MC_TYPE:
1023+
case MSR_IA32_MCG_CAP:
1024+
case MSR_IA32_MCG_CTL:
1025+
case MSR_IA32_MCG_STATUS:
1026+
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1027+
return get_msr_mce(vcpu, msr, pdata);
9691028
default:
9701029
pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
9711030
return 1;
@@ -1087,6 +1146,9 @@ int kvm_dev_ioctl_check_extension(long ext)
10871146
case KVM_CAP_IOMMU:
10881147
r = iommu_found();
10891148
break;
1149+
case KVM_CAP_MCE:
1150+
r = KVM_MAX_MCE_BANKS;
1151+
break;
10901152
default:
10911153
r = 0;
10921154
break;
@@ -1146,6 +1208,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
11461208
r = 0;
11471209
break;
11481210
}
1211+
case KVM_X86_GET_MCE_CAP_SUPPORTED: {
1212+
u64 mce_cap;
1213+
1214+
mce_cap = KVM_MCE_CAP_SUPPORTED;
1215+
r = -EFAULT;
1216+
if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
1217+
goto out;
1218+
r = 0;
1219+
break;
1220+
}
11491221
default:
11501222
r = -EINVAL;
11511223
}
@@ -1502,6 +1574,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
15021574
return 0;
15031575
}
15041576

1577+
static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
1578+
u64 mcg_cap)
1579+
{
1580+
int r;
1581+
unsigned bank_num = mcg_cap & 0xff, bank;
1582+
1583+
r = -EINVAL;
1584+
if (!bank_num)
1585+
goto out;
1586+
if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
1587+
goto out;
1588+
r = 0;
1589+
vcpu->arch.mcg_cap = mcg_cap;
1590+
/* Init IA32_MCG_CTL to all 1s */
1591+
if (mcg_cap & MCG_CTL_P)
1592+
vcpu->arch.mcg_ctl = ~(u64)0;
1593+
/* Init IA32_MCi_CTL to all 1s */
1594+
for (bank = 0; bank < bank_num; bank++)
1595+
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
1596+
out:
1597+
return r;
1598+
}
1599+
1600+
static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1601+
struct kvm_x86_mce *mce)
1602+
{
1603+
u64 mcg_cap = vcpu->arch.mcg_cap;
1604+
unsigned bank_num = mcg_cap & 0xff;
1605+
u64 *banks = vcpu->arch.mce_banks;
1606+
1607+
if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
1608+
return -EINVAL;
1609+
/*
1610+
* if IA32_MCG_CTL is not all 1s, the uncorrected error
1611+
* reporting is disabled
1612+
*/
1613+
if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
1614+
vcpu->arch.mcg_ctl != ~(u64)0)
1615+
return 0;
1616+
banks += 4 * mce->bank;
1617+
/*
1618+
* if IA32_MCi_CTL is not all 1s, the uncorrected error
1619+
* reporting is disabled for the bank
1620+
*/
1621+
if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
1622+
return 0;
1623+
if (mce->status & MCI_STATUS_UC) {
1624+
if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
1625+
!(vcpu->arch.cr4 & X86_CR4_MCE)) {
1626+
printk(KERN_DEBUG "kvm: set_mce: "
1627+
"injects mce exception while "
1628+
"previous one is in progress!\n");
1629+
set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
1630+
return 0;
1631+
}
1632+
if (banks[1] & MCI_STATUS_VAL)
1633+
mce->status |= MCI_STATUS_OVER;
1634+
banks[2] = mce->addr;
1635+
banks[3] = mce->misc;
1636+
vcpu->arch.mcg_status = mce->mcg_status;
1637+
banks[1] = mce->status;
1638+
kvm_queue_exception(vcpu, MC_VECTOR);
1639+
} else if (!(banks[1] & MCI_STATUS_VAL)
1640+
|| !(banks[1] & MCI_STATUS_UC)) {
1641+
if (banks[1] & MCI_STATUS_VAL)
1642+
mce->status |= MCI_STATUS_OVER;
1643+
banks[2] = mce->addr;
1644+
banks[3] = mce->misc;
1645+
banks[1] = mce->status;
1646+
} else
1647+
banks[1] |= MCI_STATUS_OVER;
1648+
return 0;
1649+
}
1650+
15051651
long kvm_arch_vcpu_ioctl(struct file *filp,
15061652
unsigned int ioctl, unsigned long arg)
15071653
{
@@ -1635,6 +1781,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
16351781
kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
16361782
break;
16371783
}
1784+
case KVM_X86_SETUP_MCE: {
1785+
u64 mcg_cap;
1786+
1787+
r = -EFAULT;
1788+
if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
1789+
goto out;
1790+
r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
1791+
break;
1792+
}
1793+
case KVM_X86_SET_MCE: {
1794+
struct kvm_x86_mce mce;
1795+
1796+
r = -EFAULT;
1797+
if (copy_from_user(&mce, argp, sizeof mce))
1798+
goto out;
1799+
r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
1800+
break;
1801+
}
16381802
default:
16391803
r = -EINVAL;
16401804
}
@@ -4440,6 +4604,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
44404604
goto fail_mmu_destroy;
44414605
}
44424606

4607+
vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
4608+
GFP_KERNEL);
4609+
if (!vcpu->arch.mce_banks) {
4610+
r = -ENOMEM;
4611+
goto fail_mmu_destroy;
4612+
}
4613+
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
4614+
44434615
return 0;
44444616

44454617
fail_mmu_destroy:

include/linux/kvm.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,9 @@ struct kvm_trace_rec {
415415
#define KVM_CAP_ASSIGN_DEV_IRQ 29
416416
/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
417417
#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
418+
#ifdef __KVM_HAVE_MCE
419+
#define KVM_CAP_MCE 31
420+
#endif
418421

419422
#ifdef KVM_CAP_IRQ_ROUTING
420423

@@ -454,6 +457,19 @@ struct kvm_irq_routing {
454457

455458
#endif
456459

460+
#ifdef KVM_CAP_MCE
461+
/* x86 MCE */
462+
struct kvm_x86_mce {
463+
__u64 status;
464+
__u64 addr;
465+
__u64 misc;
466+
__u64 mcg_status;
467+
__u8 bank;
468+
__u8 pad1[7];
469+
__u64 pad2[3];
470+
};
471+
#endif
472+
457473
/*
458474
* ioctls for VM fds
459475
*/
@@ -541,6 +557,10 @@ struct kvm_irq_routing {
541557
#define KVM_NMI _IO(KVMIO, 0x9a)
542558
/* Available with KVM_CAP_SET_GUEST_DEBUG */
543559
#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
560+
/* MCE for x86 */
561+
#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64)
562+
#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64)
563+
#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce)
544564

545565
/*
546566
* Deprecated interfaces

0 commit comments

Comments
 (0)