Skip to content

Commit de3a002

Browse files
jsmattsonjrbonzini
authored andcommitted
KVM: nVMX: Eliminate vmcs02 pool
The potential performance advantages of a vmcs02 pool have never been realized. To simplify the code, eliminate the pool. Instead, a single vmcs02 is allocated per VCPU when the VCPU enters VMX operation. Cc: [email protected] # prereq for Spectre mitigation Signed-off-by: Jim Mattson <[email protected]> Signed-off-by: Mark Kanda <[email protected]> Reviewed-by: Ameya More <[email protected]> Reviewed-by: David Hildenbrand <[email protected]> Reviewed-by: Paolo Bonzini <[email protected]> Signed-off-by: Radim Krčmář <[email protected]>
1 parent ba804bb commit de3a002

File tree

1 file changed

+23
-123
lines changed

1 file changed

+23
-123
lines changed

arch/x86/kvm/vmx.c

Lines changed: 23 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ module_param(ple_window_max, int, S_IRUGO);
185185
extern const ulong vmx_return;
186186

187187
#define NR_AUTOLOAD_MSRS 8
188-
#define VMCS02_POOL_SIZE 1
189188

190189
struct vmcs {
191190
u32 revision_id;
@@ -226,7 +225,7 @@ struct shared_msr_entry {
226225
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
227226
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
228227
* More than one of these structures may exist, if L1 runs multiple L2 guests.
229-
* nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
228+
* nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
230229
* underlying hardware which will be used to run L2.
231230
* This structure is packed to ensure that its layout is identical across
232231
* machines (necessary for live migration).
@@ -409,13 +408,6 @@ struct __packed vmcs12 {
409408
*/
410409
#define VMCS12_SIZE 0x1000
411410

412-
/* Used to remember the last vmcs02 used for some recently used vmcs12s */
413-
struct vmcs02_list {
414-
struct list_head list;
415-
gpa_t vmptr;
416-
struct loaded_vmcs vmcs02;
417-
};
418-
419411
/*
420412
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
421413
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -440,15 +432,15 @@ struct nested_vmx {
440432
*/
441433
bool sync_shadow_vmcs;
442434

443-
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
444-
struct list_head vmcs02_pool;
445-
int vmcs02_num;
446435
bool change_vmcs01_virtual_x2apic_mode;
447436
/* L2 must run next, and mustn't decide to exit to L1. */
448437
bool nested_run_pending;
438+
439+
struct loaded_vmcs vmcs02;
440+
449441
/*
450-
* Guest pages referred to in vmcs02 with host-physical pointers, so
451-
* we must keep them pinned while L2 runs.
442+
* Guest pages referred to in the vmcs02 with host-physical
443+
* pointers, so we must keep them pinned while L2 runs.
452444
*/
453445
struct page *apic_access_page;
454446
struct page *virtual_apic_page;
@@ -6973,94 +6965,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
69736965
return handle_nop(vcpu);
69746966
}
69756967

6976-
/*
6977-
* To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
6978-
* We could reuse a single VMCS for all the L2 guests, but we also want the
6979-
* option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
6980-
* allows keeping them loaded on the processor, and in the future will allow
6981-
* optimizations where prepare_vmcs02 doesn't need to set all the fields on
6982-
* every entry if they never change.
6983-
* So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
6984-
* (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
6985-
*
6986-
* The following functions allocate and free a vmcs02 in this pool.
6987-
*/
6988-
6989-
/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
6990-
static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
6991-
{
6992-
struct vmcs02_list *item;
6993-
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
6994-
if (item->vmptr == vmx->nested.current_vmptr) {
6995-
list_move(&item->list, &vmx->nested.vmcs02_pool);
6996-
return &item->vmcs02;
6997-
}
6998-
6999-
if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
7000-
/* Recycle the least recently used VMCS. */
7001-
item = list_last_entry(&vmx->nested.vmcs02_pool,
7002-
struct vmcs02_list, list);
7003-
item->vmptr = vmx->nested.current_vmptr;
7004-
list_move(&item->list, &vmx->nested.vmcs02_pool);
7005-
return &item->vmcs02;
7006-
}
7007-
7008-
/* Create a new VMCS */
7009-
item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
7010-
if (!item)
7011-
return NULL;
7012-
item->vmcs02.vmcs = alloc_vmcs();
7013-
item->vmcs02.shadow_vmcs = NULL;
7014-
if (!item->vmcs02.vmcs) {
7015-
kfree(item);
7016-
return NULL;
7017-
}
7018-
loaded_vmcs_init(&item->vmcs02);
7019-
item->vmptr = vmx->nested.current_vmptr;
7020-
list_add(&(item->list), &(vmx->nested.vmcs02_pool));
7021-
vmx->nested.vmcs02_num++;
7022-
return &item->vmcs02;
7023-
}
7024-
7025-
/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
7026-
static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
7027-
{
7028-
struct vmcs02_list *item;
7029-
list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
7030-
if (item->vmptr == vmptr) {
7031-
free_loaded_vmcs(&item->vmcs02);
7032-
list_del(&item->list);
7033-
kfree(item);
7034-
vmx->nested.vmcs02_num--;
7035-
return;
7036-
}
7037-
}
7038-
7039-
/*
7040-
* Free all VMCSs saved for this vcpu, except the one pointed by
7041-
* vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
7042-
* must be &vmx->vmcs01.
7043-
*/
7044-
static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
7045-
{
7046-
struct vmcs02_list *item, *n;
7047-
7048-
WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
7049-
list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
7050-
/*
7051-
* Something will leak if the above WARN triggers. Better than
7052-
* a use-after-free.
7053-
*/
7054-
if (vmx->loaded_vmcs == &item->vmcs02)
7055-
continue;
7056-
7057-
free_loaded_vmcs(&item->vmcs02);
7058-
list_del(&item->list);
7059-
kfree(item);
7060-
vmx->nested.vmcs02_num--;
7061-
}
7062-
}
7063-
70646968
/*
70656969
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
70666970
* set the success or error code of an emulated VMX instruction, as specified
@@ -7242,6 +7146,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
72427146
struct vcpu_vmx *vmx = to_vmx(vcpu);
72437147
struct vmcs *shadow_vmcs;
72447148

7149+
vmx->nested.vmcs02.vmcs = alloc_vmcs();
7150+
vmx->nested.vmcs02.shadow_vmcs = NULL;
7151+
if (!vmx->nested.vmcs02.vmcs)
7152+
goto out_vmcs02;
7153+
loaded_vmcs_init(&vmx->nested.vmcs02);
7154+
72457155
if (cpu_has_vmx_msr_bitmap()) {
72467156
vmx->nested.msr_bitmap =
72477157
(unsigned long *)__get_free_page(GFP_KERNEL);
@@ -7264,9 +7174,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
72647174
vmx->vmcs01.shadow_vmcs = shadow_vmcs;
72657175
}
72667176

7267-
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
7268-
vmx->nested.vmcs02_num = 0;
7269-
72707177
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
72717178
HRTIMER_MODE_REL_PINNED);
72727179
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
@@ -7281,6 +7188,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
72817188
free_page((unsigned long)vmx->nested.msr_bitmap);
72827189

72837190
out_msr_bitmap:
7191+
free_loaded_vmcs(&vmx->nested.vmcs02);
7192+
7193+
out_vmcs02:
72847194
return -ENOMEM;
72857195
}
72867196

@@ -7434,7 +7344,7 @@ static void free_nested(struct vcpu_vmx *vmx)
74347344
vmx->vmcs01.shadow_vmcs = NULL;
74357345
}
74367346
kfree(vmx->nested.cached_vmcs12);
7437-
/* Unpin physical memory we referred to in current vmcs02 */
7347+
/* Unpin physical memory we referred to in the vmcs02 */
74387348
if (vmx->nested.apic_access_page) {
74397349
kvm_release_page_dirty(vmx->nested.apic_access_page);
74407350
vmx->nested.apic_access_page = NULL;
@@ -7450,7 +7360,7 @@ static void free_nested(struct vcpu_vmx *vmx)
74507360
vmx->nested.pi_desc = NULL;
74517361
}
74527362

7453-
nested_free_all_saved_vmcss(vmx);
7363+
free_loaded_vmcs(&vmx->nested.vmcs02);
74547364
}
74557365

74567366
/* Emulate the VMXOFF instruction */
@@ -7493,8 +7403,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
74937403
vmptr + offsetof(struct vmcs12, launch_state),
74947404
&zero, sizeof(zero));
74957405

7496-
nested_free_vmcs02(vmx, vmptr);
7497-
74987406
nested_vmx_succeed(vcpu);
74997407
return kvm_skip_emulated_instruction(vcpu);
75007408
}
@@ -8406,10 +8314,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
84068314

84078315
/*
84088316
* The host physical addresses of some pages of guest memory
8409-
* are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
8410-
* may write to these pages via their host physical address while
8411-
* L2 is running, bypassing any address-translation-based dirty
8412-
* tracking (e.g. EPT write protection).
8317+
* are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
8318+
* Page). The CPU may write to these pages via their host
8319+
* physical address while L2 is running, bypassing any
8320+
* address-translation-based dirty tracking (e.g. EPT write
8321+
* protection).
84138322
*
84148323
* Mark them dirty on every exit from L2 to prevent them from
84158324
* getting out of sync with dirty tracking.
@@ -10903,20 +10812,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
1090310812
{
1090410813
struct vcpu_vmx *vmx = to_vmx(vcpu);
1090510814
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10906-
struct loaded_vmcs *vmcs02;
1090710815
u32 msr_entry_idx;
1090810816
u32 exit_qual;
1090910817

10910-
vmcs02 = nested_get_current_vmcs02(vmx);
10911-
if (!vmcs02)
10912-
return -ENOMEM;
10913-
1091410818
enter_guest_mode(vcpu);
1091510819

1091610820
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
1091710821
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
1091810822

10919-
vmx_switch_vmcs(vcpu, vmcs02);
10823+
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
1092010824
vmx_segment_cache_clear(vmx);
1092110825

1092210826
if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
@@ -11534,10 +11438,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1153411438
vm_exit_controls_reset_shadow(vmx);
1153511439
vmx_segment_cache_clear(vmx);
1153611440

11537-
/* if no vmcs02 cache requested, remove the one we used */
11538-
if (VMCS02_POOL_SIZE == 0)
11539-
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
11540-
1154111441
/* Update any VMCS fields that might have changed while L2 ran */
1154211442
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
1154311443
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);

0 commit comments

Comments
 (0)