Skip to content

Commit f61eff8

Browse files
anna-marialxKAGA-KOKO
authored andcommitted
hrtimer: Prepare support for PREEMPT_RT
When PREEMPT_RT is enabled, the soft interrupt thread can be preempted. If the soft interrupt thread is preempted in the middle of a timer callback, then calling hrtimer_cancel() can lead to two issues: - If the caller is on a remote CPU then it has to spin wait for the timer handler to complete. This can result in unbound priority inversion. - If the caller originates from the task which preempted the timer handler on the same CPU, then spin waiting for the timer handler to complete is never going to end. To avoid these issues, add a new lock to the timer base which is held around the execution of the timer callbacks. If hrtimer_cancel() detects that the timer callback is currently running, it blocks on the expiry lock. When the callback is finished, the expiry lock is dropped by the softirq thread which wakes up the waiter and the system makes progress. This addresses both the priority inversion and the life lock issues. The same issue can happen in virtual machines when the vCPU which runs a timer callback is scheduled out. If a second vCPU of the same guest calls hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back in. The expiry lock mechanism would avoid that. It'd be trivial to enable this when paravirt spinlocks are enabled in a guest, but it's not clear whether this is an actual problem in the wild, so for now it's an RT only mechanism. [ tglx: Refactored it for mainline ] Signed-off-by: Anna-Maria Gleixner <[email protected]> Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Acked-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 1842f5a commit f61eff8

File tree

2 files changed

+105
-6
lines changed

2 files changed

+105
-6
lines changed

include/linux/hrtimer.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ enum hrtimer_base_type {
192192
* @nr_retries: Total number of hrtimer interrupt retries
193193
* @nr_hangs: Total number of hrtimer interrupt hangs
194194
* @max_hang_time: Maximum time spent in hrtimer_interrupt
195+
* @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
196+
* expired
197+
* @timer_waiters: A hrtimer_cancel() invocation waits for the timer
198+
* callback to finish.
195199
* @expires_next: absolute time of the next event, is required for remote
196200
* hrtimer enqueue; it is the total first expiry time (hard
197201
* and soft hrtimer are taken into account)
@@ -218,6 +222,10 @@ struct hrtimer_cpu_base {
218222
unsigned short nr_retries;
219223
unsigned short nr_hangs;
220224
unsigned int max_hang_time;
225+
#endif
226+
#ifdef CONFIG_PREEMPT_RT
227+
spinlock_t softirq_expiry_lock;
228+
atomic_t timer_waiters;
221229
#endif
222230
ktime_t expires_next;
223231
struct hrtimer *next_timer;
@@ -350,6 +358,14 @@ extern void hrtimers_resume(void);
350358

351359
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
352360

361+
#ifdef CONFIG_PREEMPT_RT
362+
void hrtimer_cancel_wait_running(const struct hrtimer *timer);
363+
#else
364+
static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
365+
{
366+
cpu_relax();
367+
}
368+
#endif
353369

354370
/* Exported timer functions: */
355371

kernel/time/hrtimer.c

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,82 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
11621162
}
11631163
EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
11641164

1165+
#ifdef CONFIG_PREEMPT_RT
1166+
static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
1167+
{
1168+
spin_lock_init(&base->softirq_expiry_lock);
1169+
}
1170+
1171+
static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
1172+
{
1173+
spin_lock(&base->softirq_expiry_lock);
1174+
}
1175+
1176+
static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
1177+
{
1178+
spin_unlock(&base->softirq_expiry_lock);
1179+
}
1180+
1181+
/*
1182+
* The counterpart to hrtimer_cancel_wait_running().
1183+
*
1184+
* If there is a waiter for cpu_base->expiry_lock, then it was waiting for
1185+
* the timer callback to finish. Drop expiry_lock and reaquire it. That
1186+
* allows the waiter to acquire the lock and make progress.
1187+
*/
1188+
static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
1189+
unsigned long flags)
1190+
{
1191+
if (atomic_read(&cpu_base->timer_waiters)) {
1192+
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1193+
spin_unlock(&cpu_base->softirq_expiry_lock);
1194+
spin_lock(&cpu_base->softirq_expiry_lock);
1195+
raw_spin_lock_irq(&cpu_base->lock);
1196+
}
1197+
}
1198+
1199+
/*
1200+
* This function is called on PREEMPT_RT kernels when the fast path
1201+
* deletion of a timer failed because the timer callback function was
1202+
* running.
1203+
*
1204+
* This prevents priority inversion, if the softirq thread on a remote CPU
1205+
* got preempted, and it prevents a life lock when the task which tries to
1206+
* delete a timer preempted the softirq thread running the timer callback
1207+
* function.
1208+
*/
1209+
void hrtimer_cancel_wait_running(const struct hrtimer *timer)
1210+
{
1211+
struct hrtimer_clock_base *base = timer->base;
1212+
1213+
if (!timer->is_soft || !base || !base->cpu_base) {
1214+
cpu_relax();
1215+
return;
1216+
}
1217+
1218+
/*
1219+
* Mark the base as contended and grab the expiry lock, which is
1220+
* held by the softirq across the timer callback. Drop the lock
1221+
* immediately so the softirq can expire the next timer. In theory
1222+
* the timer could already be running again, but that's more than
1223+
* unlikely and just causes another wait loop.
1224+
*/
1225+
atomic_inc(&base->cpu_base->timer_waiters);
1226+
spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
1227+
atomic_dec(&base->cpu_base->timer_waiters);
1228+
spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
1229+
}
1230+
#else
1231+
static inline void
1232+
hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
1233+
static inline void
1234+
hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
1235+
static inline void
1236+
hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
1237+
static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
1238+
unsigned long flags) { }
1239+
#endif
1240+
11651241
/**
11661242
* hrtimer_cancel - cancel a timer and wait for the handler to finish.
11671243
* @timer: the timer to be cancelled
@@ -1172,13 +1248,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
11721248
*/
11731249
int hrtimer_cancel(struct hrtimer *timer)
11741250
{
1175-
for (;;) {
1176-
int ret = hrtimer_try_to_cancel(timer);
1251+
int ret;
11771252

1178-
if (ret >= 0)
1179-
return ret;
1180-
cpu_relax();
1181-
}
1253+
do {
1254+
ret = hrtimer_try_to_cancel(timer);
1255+
1256+
if (ret < 0)
1257+
hrtimer_cancel_wait_running(timer);
1258+
} while (ret < 0);
1259+
return ret;
11821260
}
11831261
EXPORT_SYMBOL_GPL(hrtimer_cancel);
11841262

@@ -1475,6 +1553,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
14751553
break;
14761554

14771555
__run_hrtimer(cpu_base, base, timer, &basenow, flags);
1556+
if (active_mask == HRTIMER_ACTIVE_SOFT)
1557+
hrtimer_sync_wait_running(cpu_base, flags);
14781558
}
14791559
}
14801560
}
@@ -1485,6 +1565,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
14851565
unsigned long flags;
14861566
ktime_t now;
14871567

1568+
hrtimer_cpu_base_lock_expiry(cpu_base);
14881569
raw_spin_lock_irqsave(&cpu_base->lock, flags);
14891570

14901571
now = hrtimer_update_base(cpu_base);
@@ -1494,6 +1575,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
14941575
hrtimer_update_softirq_timer(cpu_base, true);
14951576

14961577
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1578+
hrtimer_cpu_base_unlock_expiry(cpu_base);
14971579
}
14981580

14991581
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1897,6 +1979,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
18971979
cpu_base->softirq_next_timer = NULL;
18981980
cpu_base->expires_next = KTIME_MAX;
18991981
cpu_base->softirq_expires_next = KTIME_MAX;
1982+
hrtimer_cpu_base_init_expiry_lock(cpu_base);
19001983
return 0;
19011984
}
19021985

0 commit comments

Comments
 (0)