Skip to content

Commit 5c0930c

Browse files
committed
hrtimers: Push pending hrtimers away from outgoing CPU earlier
2b8272f ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug") solved the straight forward CPU hotplug deadlock vs. the scheduler bandwidth timer. Yu discovered a more involved variant where a task which has a bandwidth timer started on the outgoing CPU holds a lock and then gets throttled. If the lock required by one of the CPU hotplug callbacks the hotplug operation deadlocks because the unthrottling timer event is not handled on the dying CPU and can only be recovered once the control CPU reaches the hotplug state which pulls the pending hrtimers from the dead CPU. Solve this by pushing the hrtimers away from the dying CPU in the dying callbacks. Nothing can queue a hrtimer on the dying CPU at that point because all other CPUs spin in stop_machine() with interrupts disabled and once the operation is finished the CPU is marked offline. Reported-by: Yu Liao <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Liu Tie <[email protected]> Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx
1 parent ffc2532 commit 5c0930c

File tree

4 files changed

+22
-24
lines changed

4 files changed

+22
-24
lines changed

include/linux/cpuhotplug.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ enum cpuhp_state {
193193
CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
194194
CPUHP_AP_ARM64_ISNDEP_STARTING,
195195
CPUHP_AP_SMPCFD_DYING,
196+
CPUHP_AP_HRTIMERS_DYING,
196197
CPUHP_AP_X86_TBOOT_DYING,
197198
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
198199
CPUHP_AP_ONLINE,

include/linux/hrtimer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
531531

532532
int hrtimers_prepare_cpu(unsigned int cpu);
533533
#ifdef CONFIG_HOTPLUG_CPU
534-
int hrtimers_dead_cpu(unsigned int cpu);
534+
int hrtimers_cpu_dying(unsigned int cpu);
535535
#else
536-
#define hrtimers_dead_cpu NULL
536+
#define hrtimers_cpu_dying NULL
537537
#endif
538538

539539
#endif

kernel/cpu.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2098,7 +2098,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
20982098
[CPUHP_HRTIMERS_PREPARE] = {
20992099
.name = "hrtimers:prepare",
21002100
.startup.single = hrtimers_prepare_cpu,
2101-
.teardown.single = hrtimers_dead_cpu,
2101+
.teardown.single = NULL,
21022102
},
21032103
[CPUHP_SMPCFD_PREPARE] = {
21042104
.name = "smpcfd:prepare",
@@ -2190,6 +2190,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
21902190
.startup.single = NULL,
21912191
.teardown.single = smpcfd_dying_cpu,
21922192
},
2193+
[CPUHP_AP_HRTIMERS_DYING] = {
2194+
.name = "hrtimers:dying",
2195+
.startup.single = NULL,
2196+
.teardown.single = hrtimers_cpu_dying,
2197+
},
2198+
21932199
/* Entry state on starting. Interrupts enabled from here on. Transient
21942200
* state for synchronsization */
21952201
[CPUHP_AP_ONLINE] = {

kernel/time/hrtimer.c

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
22192219
}
22202220
}
22212221

2222-
int hrtimers_dead_cpu(unsigned int scpu)
2222+
int hrtimers_cpu_dying(unsigned int dying_cpu)
22232223
{
22242224
struct hrtimer_cpu_base *old_base, *new_base;
2225-
int i;
2225+
int i, ncpu = cpumask_first(cpu_active_mask);
22262226

2227-
BUG_ON(cpu_online(scpu));
2228-
tick_cancel_sched_timer(scpu);
2227+
tick_cancel_sched_timer(dying_cpu);
2228+
2229+
old_base = this_cpu_ptr(&hrtimer_bases);
2230+
new_base = &per_cpu(hrtimer_bases, ncpu);
22292231

2230-
/*
2231-
* this BH disable ensures that raise_softirq_irqoff() does
2232-
* not wakeup ksoftirqd (and acquire the pi-lock) while
2233-
* holding the cpu_base lock
2234-
*/
2235-
local_bh_disable();
2236-
local_irq_disable();
2237-
old_base = &per_cpu(hrtimer_bases, scpu);
2238-
new_base = this_cpu_ptr(&hrtimer_bases);
22392232
/*
22402233
* The caller is globally serialized and nobody else
22412234
* takes two locks at once, deadlock is not possible.
22422235
*/
2243-
raw_spin_lock(&new_base->lock);
2244-
raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
2236+
raw_spin_lock(&old_base->lock);
2237+
raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
22452238

22462239
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
22472240
migrate_hrtimer_list(&old_base->clock_base[i],
@@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
22522245
* The migration might have changed the first expiring softirq
22532246
* timer on this CPU. Update it.
22542247
*/
2255-
hrtimer_update_softirq_timer(new_base, false);
2248+
__hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
2249+
/* Tell the other CPU to retrigger the next event */
2250+
smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
22562251

2257-
raw_spin_unlock(&old_base->lock);
22582252
raw_spin_unlock(&new_base->lock);
2253+
raw_spin_unlock(&old_base->lock);
22592254

2260-
/* Check, if we got expired work to do */
2261-
__hrtimer_peek_ahead_timers();
2262-
local_irq_enable();
2263-
local_bh_enable();
22642255
return 0;
22652256
}
22662257

0 commit comments

Comments
 (0)