Skip to content

Commit b533137

Browse files
willdeaconbonzini
authored andcommitted
KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is not set
When an MMU notifier call results in unmapping a range that spans multiple PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary, since this avoids running into RCU stalls during VM teardown. Unfortunately, if the VM is destroyed as a result of OOM, then blocking is not permitted and the call to the scheduler triggers the following BUG(): | BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394 | in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper | INFO: lockdep is turned off. | CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1 | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 | Call trace: | dump_backtrace+0x0/0x284 | show_stack+0x1c/0x28 | dump_stack+0xf0/0x1a4 | ___might_sleep+0x2bc/0x2cc | unmap_stage2_range+0x160/0x1ac | kvm_unmap_hva_range+0x1a0/0x1c8 | kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8 | __mmu_notifier_invalidate_range_start+0x218/0x31c | mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0 | __oom_reap_task_mm+0x128/0x268 | oom_reap_task+0xac/0x298 | oom_reaper+0x178/0x17c | kthread+0x1e4/0x1fc | ret_from_fork+0x10/0x30 Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier flags. Cc: <[email protected]> Fixes: 8b3405e ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd") Cc: Marc Zyngier <[email protected]> Cc: Suzuki K Poulose <[email protected]> Cc: James Morse <[email protected]> Signed-off-by: Will Deacon <[email protected]> Message-Id: <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent fdfe7cb commit b533137

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

arch/arm64/kvm/mmu.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
343343
* destroying the VM), otherwise another faulting VCPU may come in and mess
344344
* with things behind our backs.
345345
*/
346-
static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
346+
static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
347+
bool may_block)
347348
{
348349
struct kvm *kvm = mmu->kvm;
349350
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
369370
* If the range is too large, release the kvm->mmu_lock
370371
* to prevent starvation and lockup detector warnings.
371372
*/
372-
if (next != end)
373+
if (may_block && next != end)
373374
cond_resched_lock(&kvm->mmu_lock);
374375
} while (pgd++, addr = next, addr != end);
375376
}
376377

378+
static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
379+
{
380+
__unmap_stage2_range(mmu, start, size, true);
381+
}
382+
377383
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
378384
phys_addr_t addr, phys_addr_t end)
379385
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
22082214

22092215
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
22102216
{
2211-
unmap_stage2_range(&kvm->arch.mmu, gpa, size);
2217+
unsigned flags = *(unsigned *)data;
2218+
bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
2219+
2220+
__unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
22122221
return 0;
22132222
}
22142223

@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
22192228
return 0;
22202229

22212230
trace_kvm_unmap_hva_range(start, end);
2222-
handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
2231+
handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
22232232
return 0;
22242233
}
22252234

0 commit comments

Comments
 (0)