Skip to content

Commit d28b387

Browse files
KarimAllah AhmedKAGA-KOKO
authored andcommitted
KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL
[ Based on a patch from Ashok Raj <[email protected]> ] Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only start saving and restoring when a non-zero is written to it. No attempt is made to handle STIBP here, intentionally. Filtering STIBP may be added in a future patch, which may require trapping all writes if we don't want to pass it through directly to the guest. [dwmw2: Clean up CPUID bits, save/restore manually, handle reset] Signed-off-by: KarimAllah Ahmed <[email protected]> Signed-off-by: David Woodhouse <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Reviewed-by: Darren Kenny <[email protected]> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]> Reviewed-by: Jim Mattson <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Jun Nakajima <[email protected]> Cc: [email protected] Cc: Dave Hansen <[email protected]> Cc: Tim Chen <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Asit Mallick <[email protected]> Cc: Arjan Van De Ven <[email protected]> Cc: Greg KH <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: Dan Williams <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Ashok Raj <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 28c1c9f commit d28b387

File tree

3 files changed

+110
-6
lines changed

3 files changed

+110
-6
lines changed

arch/x86/kvm/cpuid.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
367367

368368
/* cpuid 0x80000008.ebx */
369369
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
370-
F(IBPB);
370+
F(IBPB) | F(IBRS);
371371

372372
/* cpuid 0xC0000001.edx */
373373
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -394,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
394394

395395
/* cpuid 7.0.edx*/
396396
const u32 kvm_cpuid_7_0_edx_x86_features =
397-
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(ARCH_CAPABILITIES);
397+
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
398+
F(ARCH_CAPABILITIES);
398399

399400
/* all calls to cpuid_count() should be made on the same cpu */
400401
get_cpu();
@@ -630,9 +631,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
630631
g_phys_as = phys_as;
631632
entry->eax = g_phys_as | (virt_as << 8);
632633
entry->edx = 0;
633-
/* IBPB isn't necessarily present in hardware cpuid */
634+
/* IBRS and IBPB aren't necessarily present in hardware cpuid */
634635
if (boot_cpu_has(X86_FEATURE_IBPB))
635636
entry->ebx |= F(IBPB);
637+
if (boot_cpu_has(X86_FEATURE_IBRS))
638+
entry->ebx |= F(IBRS);
636639
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
637640
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
638641
break;

arch/x86/kvm/vmx.c

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,7 @@ struct vcpu_vmx {
595595
#endif
596596

597597
u64 arch_capabilities;
598+
u64 spec_ctrl;
598599

599600
u32 vm_entry_controls_shadow;
600601
u32 vm_exit_controls_shadow;
@@ -1910,6 +1911,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
19101911
vmcs_write32(EXCEPTION_BITMAP, eb);
19111912
}
19121913

1914+
/*
1915+
* Check if MSR is intercepted for currently loaded MSR bitmap.
1916+
*/
1917+
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
1918+
{
1919+
unsigned long *msr_bitmap;
1920+
int f = sizeof(unsigned long);
1921+
1922+
if (!cpu_has_vmx_msr_bitmap())
1923+
return true;
1924+
1925+
msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
1926+
1927+
if (msr <= 0x1fff) {
1928+
return !!test_bit(msr, msr_bitmap + 0x800 / f);
1929+
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
1930+
msr &= 0x1fff;
1931+
return !!test_bit(msr, msr_bitmap + 0xc00 / f);
1932+
}
1933+
1934+
return true;
1935+
}
1936+
19131937
/*
19141938
* Check if MSR is intercepted for L01 MSR bitmap.
19151939
*/
@@ -3262,6 +3286,14 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
32623286
case MSR_IA32_TSC:
32633287
msr_info->data = guest_read_tsc(vcpu);
32643288
break;
3289+
case MSR_IA32_SPEC_CTRL:
3290+
if (!msr_info->host_initiated &&
3291+
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
3292+
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3293+
return 1;
3294+
3295+
msr_info->data = to_vmx(vcpu)->spec_ctrl;
3296+
break;
32653297
case MSR_IA32_ARCH_CAPABILITIES:
32663298
if (!msr_info->host_initiated &&
32673299
!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
@@ -3375,6 +3407,37 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
33753407
case MSR_IA32_TSC:
33763408
kvm_write_tsc(vcpu, msr_info);
33773409
break;
3410+
case MSR_IA32_SPEC_CTRL:
3411+
if (!msr_info->host_initiated &&
3412+
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
3413+
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3414+
return 1;
3415+
3416+
/* The STIBP bit doesn't fault even if it's not advertised */
3417+
if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
3418+
return 1;
3419+
3420+
vmx->spec_ctrl = data;
3421+
3422+
if (!data)
3423+
break;
3424+
3425+
/*
3426+
* For non-nested:
3427+
* When it's written (to non-zero) for the first time, pass
3428+
* it through.
3429+
*
3430+
* For nested:
3431+
* The handling of the MSR bitmap for L2 guests is done in
3432+
* nested_vmx_merge_msr_bitmap. We should not touch the
3433+
* vmcs02.msr_bitmap here since it gets completely overwritten
3434+
* in the merging. We update the vmcs01 here for L1 as well
3435+
* since it will end up touching the MSR anyway now.
3436+
*/
3437+
vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
3438+
MSR_IA32_SPEC_CTRL,
3439+
MSR_TYPE_RW);
3440+
break;
33783441
case MSR_IA32_PRED_CMD:
33793442
if (!msr_info->host_initiated &&
33803443
!guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
@@ -5700,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
57005763
u64 cr0;
57015764

57025765
vmx->rmode.vm86_active = 0;
5766+
vmx->spec_ctrl = 0;
57035767

57045768
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
57055769
kvm_set_cr8(vcpu, 0);
@@ -9371,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
93719435

93729436
vmx_arm_hv_timer(vcpu);
93739437

9438+
/*
9439+
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
9440+
* it's non-zero. Since vmentry is serialising on affected CPUs, there
9441+
* is no need to worry about the conditional branch over the wrmsr
9442+
* being speculatively taken.
9443+
*/
9444+
if (vmx->spec_ctrl)
9445+
wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9446+
93749447
vmx->__launched = vmx->loaded_vmcs->launched;
93759448
asm(
93769449
/* Store host registers */
@@ -9489,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
94899562
#endif
94909563
);
94919564

9565+
/*
9566+
* We do not use IBRS in the kernel. If this vCPU has used the
9567+
* SPEC_CTRL MSR it may have left it on; save the value and
9568+
* turn it off. This is much more efficient than blindly adding
9569+
* it to the atomic save/restore list. Especially as the former
9570+
* (Saving guest MSRs on vmexit) doesn't even exist in KVM.
9571+
*
9572+
* For non-nested case:
9573+
* If the L01 MSR bitmap does not intercept the MSR, then we need to
9574+
* save it.
9575+
*
9576+
* For nested case:
9577+
* If the L02 MSR bitmap does not intercept the MSR, then we need to
9578+
* save it.
9579+
*/
9580+
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
9581+
rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9582+
9583+
if (vmx->spec_ctrl)
9584+
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
9585+
94929586
/* Eliminate branch target predictions from guest mode */
94939587
vmexit_fill_RSB();
94949588

@@ -10113,7 +10207,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
1011310207
unsigned long *msr_bitmap_l1;
1011410208
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
1011510209
/*
10116-
* pred_cmd is trying to verify two things:
10210+
* pred_cmd & spec_ctrl are trying to verify two things:
1011710211
*
1011810212
* 1. L0 gave a permission to L1 to actually passthrough the MSR. This
1011910213
* ensures that we do not accidentally generate an L02 MSR bitmap
@@ -10126,9 +10220,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
1012610220
* the MSR.
1012710221
*/
1012810222
bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
10223+
bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
1012910224

1013010225
if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
10131-
!pred_cmd)
10226+
!pred_cmd && !spec_ctrl)
1013210227
return false;
1013310228

1013410229
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
@@ -10162,6 +10257,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
1016210257
}
1016310258
}
1016410259

10260+
if (spec_ctrl)
10261+
nested_vmx_disable_intercept_for_msr(
10262+
msr_bitmap_l1, msr_bitmap_l0,
10263+
MSR_IA32_SPEC_CTRL,
10264+
MSR_TYPE_R | MSR_TYPE_W);
10265+
1016510266
if (pred_cmd)
1016610267
nested_vmx_disable_intercept_for_msr(
1016710268
msr_bitmap_l1, msr_bitmap_l0,

arch/x86/kvm/x86.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ static u32 msrs_to_save[] = {
10091009
#endif
10101010
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
10111011
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1012-
MSR_IA32_ARCH_CAPABILITIES
1012+
MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
10131013
};
10141014

10151015
static unsigned num_msrs_to_save;

0 commit comments

Comments
 (0)