Skip to content

Commit 7d76856

Browse files
committed
Merge branch 'kvm-pi-fix-lockdep' into HEAD
2 parents b6262dd + c0b8dca commit 7d76856

File tree

1 file changed

+30
-7
lines changed

1 file changed

+30
-7
lines changed

arch/x86/kvm/vmx/posted_intr.c

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
3131
*/
3232
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
3333

34+
#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING
35+
3436
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
3537
{
3638
return &(to_vmx(vcpu)->pi_desc);
@@ -89,9 +91,20 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
8991
* current pCPU if the task was migrated.
9092
*/
9193
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
92-
raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
94+
raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu);
95+
96+
/*
97+
* In addition to taking the wakeup lock for the regular/IRQ
98+
* context, tell lockdep it is being taken for the "sched out"
99+
* context as well. vCPU loads happens in task context, and
100+
* this is taking the lock of the *previous* CPU, i.e. can race
101+
* with both the scheduler and the wakeup handler.
102+
*/
103+
raw_spin_lock(spinlock);
104+
spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_);
93105
list_del(&vmx->pi_wakeup_list);
94-
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
106+
spin_release(&spinlock->dep_map, _RET_IP_);
107+
raw_spin_unlock(spinlock);
95108
}
96109

97110
dest = cpu_physical_id(cpu);
@@ -148,11 +161,23 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
148161
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
149162
struct vcpu_vmx *vmx = to_vmx(vcpu);
150163
struct pi_desc old, new;
151-
unsigned long flags;
152164

153-
local_irq_save(flags);
165+
lockdep_assert_irqs_disabled();
154166

155-
raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
167+
/*
168+
* Acquire the wakeup lock using the "sched out" context to workaround
169+
* a lockdep false positive. When this is called, schedule() holds
170+
* various per-CPU scheduler locks. When the wakeup handler runs, it
171+
* holds this CPU's wakeup lock while calling try_to_wake_up(), which
172+
* can eventually take the aforementioned scheduler locks, which causes
173+
* lockdep to assume there is deadlock.
174+
*
175+
* Deadlock can't actually occur because IRQs are disabled for the
176+
* entirety of the sched_out critical section, i.e. the wakeup handler
177+
* can't run while the scheduler locks are held.
178+
*/
179+
raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu),
180+
PI_LOCK_SCHED_OUT);
156181
list_add_tail(&vmx->pi_wakeup_list,
157182
&per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
158183
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
@@ -176,8 +201,6 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
176201
*/
177202
if (pi_test_on(&new))
178203
__apic_send_IPI_self(POSTED_INTR_WAKEUP_VECTOR);
179-
180-
local_irq_restore(flags);
181204
}
182205

183206
static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)

0 commit comments

Comments
 (0)