@@ -185,7 +185,6 @@ module_param(ple_window_max, int, S_IRUGO);
185
185
extern const ulong vmx_return ;
186
186
187
187
#define NR_AUTOLOAD_MSRS 8
188
- #define VMCS02_POOL_SIZE 1
189
188
190
189
struct vmcs {
191
190
u32 revision_id ;
@@ -226,7 +225,7 @@ struct shared_msr_entry {
226
225
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
227
226
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
228
227
* More than one of these structures may exist, if L1 runs multiple L2 guests.
229
- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
228
+ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
230
229
* underlying hardware which will be used to run L2.
231
230
* This structure is packed to ensure that its layout is identical across
232
231
* machines (necessary for live migration).
@@ -409,13 +408,6 @@ struct __packed vmcs12 {
409
408
*/
410
409
#define VMCS12_SIZE 0x1000
411
410
412
- /* Used to remember the last vmcs02 used for some recently used vmcs12s */
413
- struct vmcs02_list {
414
- struct list_head list ;
415
- gpa_t vmptr ;
416
- struct loaded_vmcs vmcs02 ;
417
- };
418
-
419
411
/*
420
412
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
421
413
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -440,15 +432,15 @@ struct nested_vmx {
440
432
*/
441
433
bool sync_shadow_vmcs ;
442
434
443
- /* vmcs02_list cache of VMCSs recently used to run L2 guests */
444
- struct list_head vmcs02_pool ;
445
- int vmcs02_num ;
446
435
bool change_vmcs01_virtual_x2apic_mode ;
447
436
/* L2 must run next, and mustn't decide to exit to L1. */
448
437
bool nested_run_pending ;
438
+
439
+ struct loaded_vmcs vmcs02 ;
440
+
449
441
/*
450
- * Guest pages referred to in vmcs02 with host-physical pointers, so
451
- * we must keep them pinned while L2 runs.
442
+ * Guest pages referred to in the vmcs02 with host-physical
443
+ * pointers, so we must keep them pinned while L2 runs.
452
444
*/
453
445
struct page * apic_access_page ;
454
446
struct page * virtual_apic_page ;
@@ -6973,94 +6965,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
6973
6965
return handle_nop (vcpu );
6974
6966
}
6975
6967
6976
- /*
6977
- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
6978
- * We could reuse a single VMCS for all the L2 guests, but we also want the
6979
- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
6980
- * allows keeping them loaded on the processor, and in the future will allow
6981
- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
6982
- * every entry if they never change.
6983
- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
6984
- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
6985
- *
6986
- * The following functions allocate and free a vmcs02 in this pool.
6987
- */
6988
-
6989
- /* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
6990
- static struct loaded_vmcs * nested_get_current_vmcs02 (struct vcpu_vmx * vmx )
6991
- {
6992
- struct vmcs02_list * item ;
6993
- list_for_each_entry (item , & vmx -> nested .vmcs02_pool , list )
6994
- if (item -> vmptr == vmx -> nested .current_vmptr ) {
6995
- list_move (& item -> list , & vmx -> nested .vmcs02_pool );
6996
- return & item -> vmcs02 ;
6997
- }
6998
-
6999
- if (vmx -> nested .vmcs02_num >= max (VMCS02_POOL_SIZE , 1 )) {
7000
- /* Recycle the least recently used VMCS. */
7001
- item = list_last_entry (& vmx -> nested .vmcs02_pool ,
7002
- struct vmcs02_list , list );
7003
- item -> vmptr = vmx -> nested .current_vmptr ;
7004
- list_move (& item -> list , & vmx -> nested .vmcs02_pool );
7005
- return & item -> vmcs02 ;
7006
- }
7007
-
7008
- /* Create a new VMCS */
7009
- item = kzalloc (sizeof (struct vmcs02_list ), GFP_KERNEL );
7010
- if (!item )
7011
- return NULL ;
7012
- item -> vmcs02 .vmcs = alloc_vmcs ();
7013
- item -> vmcs02 .shadow_vmcs = NULL ;
7014
- if (!item -> vmcs02 .vmcs ) {
7015
- kfree (item );
7016
- return NULL ;
7017
- }
7018
- loaded_vmcs_init (& item -> vmcs02 );
7019
- item -> vmptr = vmx -> nested .current_vmptr ;
7020
- list_add (& (item -> list ), & (vmx -> nested .vmcs02_pool ));
7021
- vmx -> nested .vmcs02_num ++ ;
7022
- return & item -> vmcs02 ;
7023
- }
7024
-
7025
- /* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
7026
- static void nested_free_vmcs02 (struct vcpu_vmx * vmx , gpa_t vmptr )
7027
- {
7028
- struct vmcs02_list * item ;
7029
- list_for_each_entry (item , & vmx -> nested .vmcs02_pool , list )
7030
- if (item -> vmptr == vmptr ) {
7031
- free_loaded_vmcs (& item -> vmcs02 );
7032
- list_del (& item -> list );
7033
- kfree (item );
7034
- vmx -> nested .vmcs02_num -- ;
7035
- return ;
7036
- }
7037
- }
7038
-
7039
- /*
7040
- * Free all VMCSs saved for this vcpu, except the one pointed by
7041
- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
7042
- * must be &vmx->vmcs01.
7043
- */
7044
- static void nested_free_all_saved_vmcss (struct vcpu_vmx * vmx )
7045
- {
7046
- struct vmcs02_list * item , * n ;
7047
-
7048
- WARN_ON (vmx -> loaded_vmcs != & vmx -> vmcs01 );
7049
- list_for_each_entry_safe (item , n , & vmx -> nested .vmcs02_pool , list ) {
7050
- /*
7051
- * Something will leak if the above WARN triggers. Better than
7052
- * a use-after-free.
7053
- */
7054
- if (vmx -> loaded_vmcs == & item -> vmcs02 )
7055
- continue ;
7056
-
7057
- free_loaded_vmcs (& item -> vmcs02 );
7058
- list_del (& item -> list );
7059
- kfree (item );
7060
- vmx -> nested .vmcs02_num -- ;
7061
- }
7062
- }
7063
-
7064
6968
/*
7065
6969
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
7066
6970
* set the success or error code of an emulated VMX instruction, as specified
@@ -7242,6 +7146,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7242
7146
struct vcpu_vmx * vmx = to_vmx (vcpu );
7243
7147
struct vmcs * shadow_vmcs ;
7244
7148
7149
+ vmx -> nested .vmcs02 .vmcs = alloc_vmcs ();
7150
+ vmx -> nested .vmcs02 .shadow_vmcs = NULL ;
7151
+ if (!vmx -> nested .vmcs02 .vmcs )
7152
+ goto out_vmcs02 ;
7153
+ loaded_vmcs_init (& vmx -> nested .vmcs02 );
7154
+
7245
7155
if (cpu_has_vmx_msr_bitmap ()) {
7246
7156
vmx -> nested .msr_bitmap =
7247
7157
(unsigned long * )__get_free_page (GFP_KERNEL );
@@ -7264,9 +7174,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7264
7174
vmx -> vmcs01 .shadow_vmcs = shadow_vmcs ;
7265
7175
}
7266
7176
7267
- INIT_LIST_HEAD (& (vmx -> nested .vmcs02_pool ));
7268
- vmx -> nested .vmcs02_num = 0 ;
7269
-
7270
7177
hrtimer_init (& vmx -> nested .preemption_timer , CLOCK_MONOTONIC ,
7271
7178
HRTIMER_MODE_REL_PINNED );
7272
7179
vmx -> nested .preemption_timer .function = vmx_preemption_timer_fn ;
@@ -7281,6 +7188,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7281
7188
free_page ((unsigned long )vmx -> nested .msr_bitmap );
7282
7189
7283
7190
out_msr_bitmap :
7191
+ free_loaded_vmcs (& vmx -> nested .vmcs02 );
7192
+
7193
+ out_vmcs02 :
7284
7194
return - ENOMEM ;
7285
7195
}
7286
7196
@@ -7434,7 +7344,7 @@ static void free_nested(struct vcpu_vmx *vmx)
7434
7344
vmx -> vmcs01 .shadow_vmcs = NULL ;
7435
7345
}
7436
7346
kfree (vmx -> nested .cached_vmcs12 );
7437
- /* Unpin physical memory we referred to in current vmcs02 */
7347
+ /* Unpin physical memory we referred to in the vmcs02 */
7438
7348
if (vmx -> nested .apic_access_page ) {
7439
7349
kvm_release_page_dirty (vmx -> nested .apic_access_page );
7440
7350
vmx -> nested .apic_access_page = NULL ;
@@ -7450,7 +7360,7 @@ static void free_nested(struct vcpu_vmx *vmx)
7450
7360
vmx -> nested .pi_desc = NULL ;
7451
7361
}
7452
7362
7453
- nested_free_all_saved_vmcss ( vmx );
7363
+ free_loaded_vmcs ( & vmx -> nested . vmcs02 );
7454
7364
}
7455
7365
7456
7366
/* Emulate the VMXOFF instruction */
@@ -7493,8 +7403,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
7493
7403
vmptr + offsetof(struct vmcs12 , launch_state ),
7494
7404
& zero , sizeof (zero ));
7495
7405
7496
- nested_free_vmcs02 (vmx , vmptr );
7497
-
7498
7406
nested_vmx_succeed (vcpu );
7499
7407
return kvm_skip_emulated_instruction (vcpu );
7500
7408
}
@@ -8406,10 +8314,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
8406
8314
8407
8315
/*
8408
8316
* The host physical addresses of some pages of guest memory
8409
- * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
8410
- * may write to these pages via their host physical address while
8411
- * L2 is running, bypassing any address-translation-based dirty
8412
- * tracking (e.g. EPT write protection).
8317
+ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
8318
+ * Page). The CPU may write to these pages via their host
8319
+ * physical address while L2 is running, bypassing any
8320
+ * address-translation-based dirty tracking (e.g. EPT write
8321
+ * protection).
8413
8322
*
8414
8323
* Mark them dirty on every exit from L2 to prevent them from
8415
8324
* getting out of sync with dirty tracking.
@@ -10903,20 +10812,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
10903
10812
{
10904
10813
struct vcpu_vmx * vmx = to_vmx (vcpu );
10905
10814
struct vmcs12 * vmcs12 = get_vmcs12 (vcpu );
10906
- struct loaded_vmcs * vmcs02 ;
10907
10815
u32 msr_entry_idx ;
10908
10816
u32 exit_qual ;
10909
10817
10910
- vmcs02 = nested_get_current_vmcs02 (vmx );
10911
- if (!vmcs02 )
10912
- return - ENOMEM ;
10913
-
10914
10818
enter_guest_mode (vcpu );
10915
10819
10916
10820
if (!(vmcs12 -> vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS ))
10917
10821
vmx -> nested .vmcs01_debugctl = vmcs_read64 (GUEST_IA32_DEBUGCTL );
10918
10822
10919
- vmx_switch_vmcs (vcpu , vmcs02 );
10823
+ vmx_switch_vmcs (vcpu , & vmx -> nested . vmcs02 );
10920
10824
vmx_segment_cache_clear (vmx );
10921
10825
10922
10826
if (prepare_vmcs02 (vcpu , vmcs12 , from_vmentry , & exit_qual )) {
@@ -11534,10 +11438,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11534
11438
vm_exit_controls_reset_shadow (vmx );
11535
11439
vmx_segment_cache_clear (vmx );
11536
11440
11537
- /* if no vmcs02 cache requested, remove the one we used */
11538
- if (VMCS02_POOL_SIZE == 0 )
11539
- nested_free_vmcs02 (vmx , vmx -> nested .current_vmptr );
11540
-
11541
11441
/* Update any VMCS fields that might have changed while L2 ran */
11542
11442
vmcs_write32 (VM_EXIT_MSR_LOAD_COUNT , vmx -> msr_autoload .nr );
11543
11443
vmcs_write32 (VM_ENTRY_MSR_LOAD_COUNT , vmx -> msr_autoload .nr );
0 commit comments