Skip to content

Commit 5304b8d

Browse files
Xiao GuangrongGleb Natapov
authored andcommitted
KVM: MMU: fast invalidate all pages
The current kvm_mmu_zap_all is really slow - it is holding mmu-lock to walk and zap all shadow pages one by one, also it need to zap all guest page's rmap and all shadow page's parent spte list. Particularly, things become worse if guest uses more memory or vcpus. It is not good for scalability In this patch, we introduce a faster way to invalidate all shadow pages. KVM maintains a global mmu invalid generation-number which is stored in kvm->arch.mmu_valid_gen and every shadow page stores the current global generation-number into sp->mmu_valid_gen when it is created When KVM need zap all shadow pages sptes, it just simply increase the global generation-number then reload root shadow pages on all vcpus. Vcpu will create a new shadow page table according to current kvm's generation-number. It ensures the old pages are not used any more. Then the obsolete pages (sp->mmu_valid_gen != kvm->arch.mmu_valid_gen) are zapped by using lock-break technique Signed-off-by: Xiao Guangrong <[email protected]> Reviewed-by: Marcelo Tosatti <[email protected]> Signed-off-by: Gleb Natapov <[email protected]>
1 parent a2ae162 commit 5304b8d

File tree

3 files changed

+93
-0
lines changed

3 files changed

+93
-0
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ struct kvm_mmu_page {
222222
int root_count; /* Currently serving as active root */
223223
unsigned int unsync_children;
224224
unsigned long parent_ptes; /* Reverse mapping for parent_pte */
225+
unsigned long mmu_valid_gen;
225226
DECLARE_BITMAP(unsync_child_bitmap, 512);
226227

227228
#ifdef CONFIG_X86_32
@@ -529,6 +530,7 @@ struct kvm_arch {
529530
unsigned int n_requested_mmu_pages;
530531
unsigned int n_max_mmu_pages;
531532
unsigned int indirect_shadow_pages;
533+
unsigned long mmu_valid_gen;
532534
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
533535
/*
534536
* Hash table of struct kvm_mmu_page.

arch/x86/kvm/mmu.c

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,6 +1511,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
15111511
if (!direct)
15121512
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
15131513
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
1514+
1515+
/*
1516+
* The active_mmu_pages list is the FIFO list, do not move the
1517+
* page until it is zapped. kvm_zap_obsolete_pages depends on
1518+
* this feature. See the comments in kvm_zap_obsolete_pages().
1519+
*/
15141520
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
15151521
sp->parent_ptes = 0;
15161522
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
@@ -1838,6 +1844,11 @@ static void clear_sp_write_flooding_count(u64 *spte)
18381844
__clear_sp_write_flooding_count(sp);
18391845
}
18401846

1847+
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
1848+
{
1849+
return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
1850+
}
1851+
18411852
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
18421853
gfn_t gfn,
18431854
gva_t gaddr,
@@ -1900,6 +1911,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
19001911

19011912
account_shadowed(vcpu->kvm, gfn);
19021913
}
1914+
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
19031915
init_shadow_page_table(sp);
19041916
trace_kvm_mmu_get_page(sp, true);
19051917
return sp;
@@ -2070,8 +2082,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
20702082
ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
20712083
kvm_mmu_page_unlink_children(kvm, sp);
20722084
kvm_mmu_unlink_parents(kvm, sp);
2085+
20732086
if (!sp->role.invalid && !sp->role.direct)
20742087
unaccount_shadowed(kvm, sp->gfn);
2088+
20752089
if (sp->unsync)
20762090
kvm_unlink_unsync_page(kvm, sp);
20772091
if (!sp->root_count) {
@@ -4195,6 +4209,82 @@ void kvm_mmu_zap_all(struct kvm *kvm)
41954209
spin_unlock(&kvm->mmu_lock);
41964210
}
41974211

4212+
static void kvm_zap_obsolete_pages(struct kvm *kvm)
4213+
{
4214+
struct kvm_mmu_page *sp, *node;
4215+
LIST_HEAD(invalid_list);
4216+
4217+
restart:
4218+
list_for_each_entry_safe_reverse(sp, node,
4219+
&kvm->arch.active_mmu_pages, link) {
4220+
/*
4221+
* No obsolete page exists before new created page since
4222+
* active_mmu_pages is the FIFO list.
4223+
*/
4224+
if (!is_obsolete_sp(kvm, sp))
4225+
break;
4226+
4227+
/*
4228+
* Do not repeatedly zap a root page to avoid unnecessary
4229+
* KVM_REQ_MMU_RELOAD, otherwise we may not be able to
4230+
* progress:
4231+
* vcpu 0 vcpu 1
4232+
* call vcpu_enter_guest():
4233+
* 1): handle KVM_REQ_MMU_RELOAD
4234+
* and require mmu-lock to
4235+
* load mmu
4236+
* repeat:
4237+
* 1): zap root page and
4238+
* send KVM_REQ_MMU_RELOAD
4239+
*
4240+
* 2): if (cond_resched_lock(mmu-lock))
4241+
*
4242+
* 2): hold mmu-lock and load mmu
4243+
*
4244+
* 3): see KVM_REQ_MMU_RELOAD bit
4245+
* on vcpu->requests is set
4246+
* then return 1 to call
4247+
* vcpu_enter_guest() again.
4248+
* goto repeat;
4249+
*
4250+
* Since we are reversely walking the list and the invalid
4251+
* list will be moved to the head, skip the invalid page
4252+
* can help us to avoid the infinity list walking.
4253+
*/
4254+
if (sp->role.invalid)
4255+
continue;
4256+
4257+
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
4258+
kvm_mmu_commit_zap_page(kvm, &invalid_list);
4259+
cond_resched_lock(&kvm->mmu_lock);
4260+
goto restart;
4261+
}
4262+
4263+
if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
4264+
goto restart;
4265+
}
4266+
4267+
kvm_mmu_commit_zap_page(kvm, &invalid_list);
4268+
}
4269+
4270+
/*
4271+
* Fast invalidate all shadow pages and use lock-break technique
4272+
* to zap obsolete pages.
4273+
*
4274+
* It's required when memslot is being deleted or VM is being
4275+
* destroyed, in these cases, we should ensure that KVM MMU does
4276+
* not use any resource of the being-deleted slot or all slots
4277+
* after calling the function.
4278+
*/
4279+
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
4280+
{
4281+
spin_lock(&kvm->mmu_lock);
4282+
kvm->arch.mmu_valid_gen++;
4283+
4284+
kvm_zap_obsolete_pages(kvm);
4285+
spin_unlock(&kvm->mmu_lock);
4286+
}
4287+
41984288
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
41994289
{
42004290
struct kvm_mmu_page *sp, *node;

arch/x86/kvm/mmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
9797
return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
9898
}
9999

100+
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
100101
#endif

0 commit comments

Comments
 (0)