Skip to content

Commit add2032

Browse files
committed
Merge tag 'kvm-x86-fixes-6.15-rcN' of https://github.com/kvm-x86/linux into HEAD
KVM x86 fixes for 6.15-rcN - Forcibly leave SMM on SHUTDOWN interception on AMD CPUs to avoid causing problems due to KVM stuffing INIT on SHUTDOWN (KVM needs to sanitize the VMCB as its state is undefined after SHUTDOWN, emulating INIT is the least awful choice). - Track the valid sync/dirty fields in kvm_run as a u64 to ensure KVM KVM doesn't goof a sanity check in the future. - Free obsolete roots when (re)loading the MMU to fix a bug where pre-faulting memory can get stuck due to always encountering a stale root. - When dumping GHCB state, use KVM's snapshot instead of the raw GHCB page to print state, so that KVM doesn't print stale/wrong information. - When changing memory attributes (e.g. shared <=> private), add potential hugepage ranges to the mmu_invalidate_range_{start,end} set so that KVM doesn't create a shared/private hugepage when the the corresponding attributes will become mixed (the attributes are commited *after* KVM finishes the invalidation). - Rework the SRSO mitigation to enable BP_SPEC_REDUCE only when KVM has at least one active VM. Effectively BP_SPEC_REDUCE when KVM is loaded led to very measurable performance regressions for non-KVM workloads.
2 parents 36867c0 + e3417ab commit add2032

File tree

7 files changed

+150
-37
lines changed

7 files changed

+150
-37
lines changed

arch/x86/kvm/mmu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
104104

105105
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
106106
{
107+
if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
108+
kvm_mmu_free_obsolete_roots(vcpu);
109+
107110
/*
108111
* Checking root.hpa is sufficient even when KVM has mirror root.
109112
* We can have either:

arch/x86/kvm/mmu/mmu.c

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5974,6 +5974,7 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
59745974
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu);
59755975
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
59765976
}
5977+
EXPORT_SYMBOL_GPL(kvm_mmu_free_obsolete_roots);
59775978

59785979
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
59795980
int *bytes)
@@ -7669,9 +7670,30 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
76697670
}
76707671

76717672
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
7673+
static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
7674+
int level)
7675+
{
7676+
return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG;
7677+
}
7678+
7679+
static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
7680+
int level)
7681+
{
7682+
lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG;
7683+
}
7684+
7685+
static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
7686+
int level)
7687+
{
7688+
lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG;
7689+
}
7690+
76727691
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
76737692
struct kvm_gfn_range *range)
76747693
{
7694+
struct kvm_memory_slot *slot = range->slot;
7695+
int level;
7696+
76757697
/*
76767698
* Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only
76777699
* supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM
@@ -7686,6 +7708,38 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
76867708
if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
76877709
return false;
76887710

7711+
if (WARN_ON_ONCE(range->end <= range->start))
7712+
return false;
7713+
7714+
/*
7715+
* If the head and tail pages of the range currently allow a hugepage,
7716+
* i.e. reside fully in the slot and don't have mixed attributes, then
7717+
* add each corresponding hugepage range to the ongoing invalidation,
7718+
* e.g. to prevent KVM from creating a hugepage in response to a fault
7719+
* for a gfn whose attributes aren't changing. Note, only the range
7720+
* of gfns whose attributes are being modified needs to be explicitly
7721+
* unmapped, as that will unmap any existing hugepages.
7722+
*/
7723+
for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
7724+
gfn_t start = gfn_round_for_level(range->start, level);
7725+
gfn_t end = gfn_round_for_level(range->end - 1, level);
7726+
gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
7727+
7728+
if ((start != range->start || start + nr_pages > range->end) &&
7729+
start >= slot->base_gfn &&
7730+
start + nr_pages <= slot->base_gfn + slot->npages &&
7731+
!hugepage_test_mixed(slot, start, level))
7732+
kvm_mmu_invalidate_range_add(kvm, start, start + nr_pages);
7733+
7734+
if (end == start)
7735+
continue;
7736+
7737+
if ((end + nr_pages) > range->end &&
7738+
(end + nr_pages) <= (slot->base_gfn + slot->npages) &&
7739+
!hugepage_test_mixed(slot, end, level))
7740+
kvm_mmu_invalidate_range_add(kvm, end, end + nr_pages);
7741+
}
7742+
76897743
/* Unmap the old attribute page. */
76907744
if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE)
76917745
range->attr_filter = KVM_FILTER_SHARED;
@@ -7695,23 +7749,7 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
76957749
return kvm_unmap_gfn_range(kvm, range);
76967750
}
76977751

7698-
static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
7699-
int level)
7700-
{
7701-
return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG;
7702-
}
7703-
7704-
static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
7705-
int level)
7706-
{
7707-
lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG;
7708-
}
77097752

7710-
static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
7711-
int level)
7712-
{
7713-
lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG;
7714-
}
77157753

77167754
static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
77177755
gfn_t gfn, int level, unsigned long attrs)

arch/x86/kvm/smm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
131131

132132
kvm_mmu_reset_context(vcpu);
133133
}
134+
EXPORT_SYMBOL_GPL(kvm_smm_changed);
134135

135136
void process_smi(struct kvm_vcpu *vcpu)
136137
{

arch/x86/kvm/svm/sev.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3173,9 +3173,14 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
31733173
kvfree(svm->sev_es.ghcb_sa);
31743174
}
31753175

3176+
static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
3177+
{
3178+
return (((u64)control->exit_code_hi) << 32) | control->exit_code;
3179+
}
3180+
31763181
static void dump_ghcb(struct vcpu_svm *svm)
31773182
{
3178-
struct ghcb *ghcb = svm->sev_es.ghcb;
3183+
struct vmcb_control_area *control = &svm->vmcb->control;
31793184
unsigned int nbits;
31803185

31813186
/* Re-use the dump_invalid_vmcb module parameter */
@@ -3184,18 +3189,24 @@ static void dump_ghcb(struct vcpu_svm *svm)
31843189
return;
31853190
}
31863191

3187-
nbits = sizeof(ghcb->save.valid_bitmap) * 8;
3192+
nbits = sizeof(svm->sev_es.valid_bitmap) * 8;
31883193

3189-
pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
3194+
/*
3195+
* Print KVM's snapshot of the GHCB values that were (unsuccessfully)
3196+
* used to handle the exit. If the guest has since modified the GHCB
3197+
* itself, dumping the raw GHCB won't help debug why KVM was unable to
3198+
* handle the VMGEXIT that KVM observed.
3199+
*/
3200+
pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa);
31903201
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
3191-
ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
3202+
kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm));
31923203
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
3193-
ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
3204+
control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm));
31943205
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
3195-
ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
3206+
control->exit_info_2, kvm_ghcb_sw_exit_info_2_is_valid(svm));
31963207
pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
3197-
ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
3198-
pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
3208+
svm->sev_es.sw_scratch, kvm_ghcb_sw_scratch_is_valid(svm));
3209+
pr_err("%-20s%*pb\n", "valid_bitmap", nbits, svm->sev_es.valid_bitmap);
31993210
}
32003211

32013212
static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
@@ -3266,11 +3277,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
32663277
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
32673278
}
32683279

3269-
static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
3270-
{
3271-
return (((u64)control->exit_code_hi) << 32) | control->exit_code;
3272-
}
3273-
32743280
static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
32753281
{
32763282
struct vmcb_control_area *control = &svm->vmcb->control;

arch/x86/kvm/svm/svm.c

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -607,9 +607,6 @@ static void svm_disable_virtualization_cpu(void)
607607
kvm_cpu_svm_disable();
608608

609609
amd_pmu_disable_virt();
610-
611-
if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE))
612-
msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT);
613610
}
614611

615612
static int svm_enable_virtualization_cpu(void)
@@ -687,9 +684,6 @@ static int svm_enable_virtualization_cpu(void)
687684
rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi);
688685
}
689686

690-
if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE))
691-
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT);
692-
693687
return 0;
694688
}
695689

@@ -1518,6 +1512,63 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
15181512
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
15191513
}
15201514

1515+
#ifdef CONFIG_CPU_MITIGATIONS
1516+
static DEFINE_SPINLOCK(srso_lock);
1517+
static atomic_t srso_nr_vms;
1518+
1519+
static void svm_srso_clear_bp_spec_reduce(void *ign)
1520+
{
1521+
struct svm_cpu_data *sd = this_cpu_ptr(&svm_data);
1522+
1523+
if (!sd->bp_spec_reduce_set)
1524+
return;
1525+
1526+
msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT);
1527+
sd->bp_spec_reduce_set = false;
1528+
}
1529+
1530+
static void svm_srso_vm_destroy(void)
1531+
{
1532+
if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE))
1533+
return;
1534+
1535+
if (atomic_dec_return(&srso_nr_vms))
1536+
return;
1537+
1538+
guard(spinlock)(&srso_lock);
1539+
1540+
/*
1541+
* Verify a new VM didn't come along, acquire the lock, and increment
1542+
* the count before this task acquired the lock.
1543+
*/
1544+
if (atomic_read(&srso_nr_vms))
1545+
return;
1546+
1547+
on_each_cpu(svm_srso_clear_bp_spec_reduce, NULL, 1);
1548+
}
1549+
1550+
static void svm_srso_vm_init(void)
1551+
{
1552+
if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE))
1553+
return;
1554+
1555+
/*
1556+
* Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0
1557+
* transition, i.e. destroying the last VM, is fully complete, e.g. so
1558+
* that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs.
1559+
*/
1560+
if (atomic_inc_not_zero(&srso_nr_vms))
1561+
return;
1562+
1563+
guard(spinlock)(&srso_lock);
1564+
1565+
atomic_inc(&srso_nr_vms);
1566+
}
1567+
#else
1568+
static void svm_srso_vm_init(void) { }
1569+
static void svm_srso_vm_destroy(void) { }
1570+
#endif
1571+
15211572
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
15221573
{
15231574
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1550,6 +1601,11 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
15501601
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
15511602
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
15521603

1604+
if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) &&
1605+
!sd->bp_spec_reduce_set) {
1606+
sd->bp_spec_reduce_set = true;
1607+
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT);
1608+
}
15531609
svm->guest_state_loaded = true;
15541610
}
15551611

@@ -2231,6 +2287,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
22312287
*/
22322288
if (!sev_es_guest(vcpu->kvm)) {
22332289
clear_page(svm->vmcb);
2290+
#ifdef CONFIG_KVM_SMM
2291+
if (is_smm(vcpu))
2292+
kvm_smm_changed(vcpu, false);
2293+
#endif
22342294
kvm_vcpu_reset(vcpu, true);
22352295
}
22362296

@@ -5036,6 +5096,8 @@ static void svm_vm_destroy(struct kvm *kvm)
50365096
{
50375097
avic_vm_destroy(kvm);
50385098
sev_vm_destroy(kvm);
5099+
5100+
svm_srso_vm_destroy();
50395101
}
50405102

50415103
static int svm_vm_init(struct kvm *kvm)
@@ -5061,6 +5123,7 @@ static int svm_vm_init(struct kvm *kvm)
50615123
return ret;
50625124
}
50635125

5126+
svm_srso_vm_init();
50645127
return 0;
50655128
}
50665129

arch/x86/kvm/svm/svm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ struct svm_cpu_data {
335335
u32 next_asid;
336336
u32 min_asid;
337337

338+
bool bp_spec_reduce_set;
339+
338340
struct vmcb *save_area;
339341
unsigned long save_area_pa;
340342

arch/x86/kvm/x86.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4597,7 +4597,7 @@ static bool kvm_is_vm_type_supported(unsigned long type)
45974597
return type < 32 && (kvm_caps.supported_vm_types & BIT(type));
45984598
}
45994599

4600-
static inline u32 kvm_sync_valid_fields(struct kvm *kvm)
4600+
static inline u64 kvm_sync_valid_fields(struct kvm *kvm)
46014601
{
46024602
return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS;
46034603
}
@@ -11493,7 +11493,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
1149311493
{
1149411494
struct kvm_queued_exception *ex = &vcpu->arch.exception;
1149511495
struct kvm_run *kvm_run = vcpu->run;
11496-
u32 sync_valid_fields;
11496+
u64 sync_valid_fields;
1149711497
int r;
1149811498

1149911499
r = kvm_mmu_post_init_vm(vcpu->kvm);

0 commit comments

Comments
 (0)