@@ -197,6 +197,47 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
197
197
static u64 __read_mostly shadow_user_mask ;
198
198
static u64 __read_mostly shadow_accessed_mask ;
199
199
static u64 __read_mostly shadow_dirty_mask ;
200
+ static u64 __read_mostly shadow_mmio_mask ;
201
+
202
+ static void mmu_spte_set (u64 * sptep , u64 spte );
203
+
204
+ void kvm_mmu_set_mmio_spte_mask (u64 mmio_mask )
205
+ {
206
+ shadow_mmio_mask = mmio_mask ;
207
+ }
208
+ EXPORT_SYMBOL_GPL (kvm_mmu_set_mmio_spte_mask );
209
+
210
+ static void mark_mmio_spte (u64 * sptep , u64 gfn , unsigned access )
211
+ {
212
+ access &= ACC_WRITE_MASK | ACC_USER_MASK ;
213
+
214
+ mmu_spte_set (sptep , shadow_mmio_mask | access | gfn << PAGE_SHIFT );
215
+ }
216
+
217
+ static bool is_mmio_spte (u64 spte )
218
+ {
219
+ return (spte & shadow_mmio_mask ) == shadow_mmio_mask ;
220
+ }
221
+
222
+ static gfn_t get_mmio_spte_gfn (u64 spte )
223
+ {
224
+ return (spte & ~shadow_mmio_mask ) >> PAGE_SHIFT ;
225
+ }
226
+
227
+ static unsigned get_mmio_spte_access (u64 spte )
228
+ {
229
+ return (spte & ~shadow_mmio_mask ) & ~PAGE_MASK ;
230
+ }
231
+
232
+ static bool set_mmio_spte (u64 * sptep , gfn_t gfn , pfn_t pfn , unsigned access )
233
+ {
234
+ if (unlikely (is_noslot_pfn (pfn ))) {
235
+ mark_mmio_spte (sptep , gfn , access );
236
+ return true;
237
+ }
238
+
239
+ return false;
240
+ }
200
241
201
242
static inline u64 rsvd_bits (int s , int e )
202
243
{
@@ -226,7 +267,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
226
267
227
268
static int is_shadow_present_pte (u64 pte )
228
269
{
229
- return pte & PT_PRESENT_MASK ;
270
+ return pte & PT_PRESENT_MASK && ! is_mmio_spte ( pte ) ;
230
271
}
231
272
232
273
static int is_large_pte (u64 pte )
@@ -285,6 +326,12 @@ static u64 __get_spte_lockless(u64 *sptep)
285
326
{
286
327
return ACCESS_ONCE (* sptep );
287
328
}
329
+
330
+ static bool __check_direct_spte_mmio_pf (u64 spte )
331
+ {
332
+ /* It is valid if the spte is zapped. */
333
+ return spte == 0ull ;
334
+ }
288
335
#else
289
336
union split_spte {
290
337
struct {
@@ -388,6 +435,23 @@ static u64 __get_spte_lockless(u64 *sptep)
388
435
389
436
return spte .spte ;
390
437
}
438
+
439
+ static bool __check_direct_spte_mmio_pf (u64 spte )
440
+ {
441
+ union split_spte sspte = (union split_spte )spte ;
442
+ u32 high_mmio_mask = shadow_mmio_mask >> 32 ;
443
+
444
+ /* It is valid if the spte is zapped. */
445
+ if (spte == 0ull )
446
+ return true;
447
+
448
+ /* It is valid if the spte is being zapped. */
449
+ if (sspte .spte_low == 0ull &&
450
+ (sspte .spte_high & high_mmio_mask ) == high_mmio_mask )
451
+ return true;
452
+
453
+ return false;
454
+ }
391
455
#endif
392
456
393
457
static bool spte_has_volatile_bits (u64 spte )
@@ -1745,7 +1809,8 @@ static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
1745
1809
child = page_header (pte & PT64_BASE_ADDR_MASK );
1746
1810
drop_parent_pte (child , spte );
1747
1811
}
1748
- }
1812
+ } else if (is_mmio_spte (pte ))
1813
+ mmu_spte_clear_no_track (spte );
1749
1814
1750
1815
if (is_large_pte (pte ))
1751
1816
-- kvm -> stat .lpages ;
@@ -2120,6 +2185,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2120
2185
u64 spte , entry = * sptep ;
2121
2186
int ret = 0 ;
2122
2187
2188
+ if (set_mmio_spte (sptep , gfn , pfn , pte_access ))
2189
+ return 0 ;
2190
+
2123
2191
/*
2124
2192
* We don't set the accessed bit, since we sometimes want to see
2125
2193
* whether the guest actually used the pte (in order to detect
@@ -2255,6 +2323,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2255
2323
kvm_mmu_flush_tlb (vcpu );
2256
2324
}
2257
2325
2326
+ if (unlikely (is_mmio_spte (* sptep ) && emulate ))
2327
+ * emulate = 1 ;
2328
+
2258
2329
pgprintk ("%s: setting spte %llx\n" , __func__ , * sptep );
2259
2330
pgprintk ("instantiating %s PTE (%s) at %llx (%llx) addr %p\n" ,
2260
2331
is_large_pte (* sptep )? "2MB" : "4kB" ,
@@ -2481,7 +2552,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
2481
2552
2482
2553
static bool mmu_invalid_pfn (pfn_t pfn )
2483
2554
{
2484
- return unlikely (is_invalid_pfn (pfn ) || is_noslot_pfn ( pfn ) );
2555
+ return unlikely (is_invalid_pfn (pfn ));
2485
2556
}
2486
2557
2487
2558
static bool handle_abnormal_pfn (struct kvm_vcpu * vcpu , gva_t gva , gfn_t gfn ,
@@ -2495,11 +2566,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
2495
2566
goto exit ;
2496
2567
}
2497
2568
2498
- if (unlikely (is_noslot_pfn (pfn ))) {
2569
+ if (unlikely (is_noslot_pfn (pfn )))
2499
2570
vcpu_cache_mmio_info (vcpu , gva , gfn , access );
2500
- * ret_val = 1 ;
2501
- goto exit ;
2502
- }
2503
2571
2504
2572
ret = false;
2505
2573
exit :
@@ -2813,13 +2881,103 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
2813
2881
return vcpu -> arch .nested_mmu .translate_gpa (vcpu , vaddr , access );
2814
2882
}
2815
2883
2884
+ static bool quickly_check_mmio_pf (struct kvm_vcpu * vcpu , u64 addr , bool direct )
2885
+ {
2886
+ if (direct )
2887
+ return vcpu_match_mmio_gpa (vcpu , addr );
2888
+
2889
+ return vcpu_match_mmio_gva (vcpu , addr );
2890
+ }
2891
+
2892
+
2893
+ /*
2894
+ * On direct hosts, the last spte is only allows two states
2895
+ * for mmio page fault:
2896
+ * - It is the mmio spte
2897
+ * - It is zapped or it is being zapped.
2898
+ *
2899
+ * This function completely checks the spte when the last spte
2900
+ * is not the mmio spte.
2901
+ */
2902
+ static bool check_direct_spte_mmio_pf (u64 spte )
2903
+ {
2904
+ return __check_direct_spte_mmio_pf (spte );
2905
+ }
2906
+
2907
+ static u64 walk_shadow_page_get_mmio_spte (struct kvm_vcpu * vcpu , u64 addr )
2908
+ {
2909
+ struct kvm_shadow_walk_iterator iterator ;
2910
+ u64 spte = 0ull ;
2911
+
2912
+ walk_shadow_page_lockless_begin (vcpu );
2913
+ for_each_shadow_entry_lockless (vcpu , addr , iterator , spte )
2914
+ if (!is_shadow_present_pte (spte ))
2915
+ break ;
2916
+ walk_shadow_page_lockless_end (vcpu );
2917
+
2918
+ return spte ;
2919
+ }
2920
+
2921
+ /*
2922
+ * If it is a real mmio page fault, return 1 and emulat the instruction
2923
+ * directly, return 0 to let CPU fault again on the address, -1 is
2924
+ * returned if bug is detected.
2925
+ */
2926
+ int handle_mmio_page_fault_common (struct kvm_vcpu * vcpu , u64 addr , bool direct )
2927
+ {
2928
+ u64 spte ;
2929
+
2930
+ if (quickly_check_mmio_pf (vcpu , addr , direct ))
2931
+ return 1 ;
2932
+
2933
+ spte = walk_shadow_page_get_mmio_spte (vcpu , addr );
2934
+
2935
+ if (is_mmio_spte (spte )) {
2936
+ gfn_t gfn = get_mmio_spte_gfn (spte );
2937
+ unsigned access = get_mmio_spte_access (spte );
2938
+
2939
+ if (direct )
2940
+ addr = 0 ;
2941
+ vcpu_cache_mmio_info (vcpu , addr , gfn , access );
2942
+ return 1 ;
2943
+ }
2944
+
2945
+ /*
2946
+ * It's ok if the gva is remapped by other cpus on shadow guest,
2947
+ * it's a BUG if the gfn is not a mmio page.
2948
+ */
2949
+ if (direct && !check_direct_spte_mmio_pf (spte ))
2950
+ return -1 ;
2951
+
2952
+ /*
2953
+ * If the page table is zapped by other cpus, let CPU fault again on
2954
+ * the address.
2955
+ */
2956
+ return 0 ;
2957
+ }
2958
+ EXPORT_SYMBOL_GPL (handle_mmio_page_fault_common );
2959
+
2960
+ static int handle_mmio_page_fault (struct kvm_vcpu * vcpu , u64 addr ,
2961
+ u32 error_code , bool direct )
2962
+ {
2963
+ int ret ;
2964
+
2965
+ ret = handle_mmio_page_fault_common (vcpu , addr , direct );
2966
+ WARN_ON (ret < 0 );
2967
+ return ret ;
2968
+ }
2969
+
2816
2970
static int nonpaging_page_fault (struct kvm_vcpu * vcpu , gva_t gva ,
2817
2971
u32 error_code , bool prefault )
2818
2972
{
2819
2973
gfn_t gfn ;
2820
2974
int r ;
2821
2975
2822
2976
pgprintk ("%s: gva %lx error %x\n" , __func__ , gva , error_code );
2977
+
2978
+ if (unlikely (error_code & PFERR_RSVD_MASK ))
2979
+ return handle_mmio_page_fault (vcpu , gva , error_code , true);
2980
+
2823
2981
r = mmu_topup_memory_caches (vcpu );
2824
2982
if (r )
2825
2983
return r ;
@@ -2896,6 +3054,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
2896
3054
ASSERT (vcpu );
2897
3055
ASSERT (VALID_PAGE (vcpu -> arch .mmu .root_hpa ));
2898
3056
3057
+ if (unlikely (error_code & PFERR_RSVD_MASK ))
3058
+ return handle_mmio_page_fault (vcpu , gpa , error_code , true);
3059
+
2899
3060
r = mmu_topup_memory_caches (vcpu );
2900
3061
if (r )
2901
3062
return r ;
@@ -2993,6 +3154,23 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
2993
3154
return (gpte & mmu -> rsvd_bits_mask [bit7 ][level - 1 ]) != 0 ;
2994
3155
}
2995
3156
3157
+ static bool sync_mmio_spte (u64 * sptep , gfn_t gfn , unsigned access ,
3158
+ int * nr_present )
3159
+ {
3160
+ if (unlikely (is_mmio_spte (* sptep ))) {
3161
+ if (gfn != get_mmio_spte_gfn (* sptep )) {
3162
+ mmu_spte_clear_no_track (sptep );
3163
+ return true;
3164
+ }
3165
+
3166
+ (* nr_present )++ ;
3167
+ mark_mmio_spte (sptep , gfn , access );
3168
+ return true;
3169
+ }
3170
+
3171
+ return false;
3172
+ }
3173
+
2996
3174
#define PTTYPE 64
2997
3175
#include "paging_tmpl.h"
2998
3176
#undef PTTYPE
0 commit comments