Skip to content

Commit 9bc7491

Browse files
committed
hrtimer: Prevent stale expiry time in hrtimer_interrupt()
hrtimer_interrupt() has the following subtle issue: hrtimer_interrupt() lock(cpu_base); expires_next = KTIME_MAX; expire_timers(CLOCK_MONOTONIC); expires = get_next_timer(CLOCK_MONOTONIC); if (expires < expires_next) expires_next = expires; expire_timers(CLOCK_REALTIME); unlock(cpu_base); wakeup() hrtimer_start(CLOCK_MONOTONIC, newtimer); lock(cpu_base(); expires = get_next_timer(CLOCK_REALTIME); if (expires < expires_next) expires_next = expires; So because we already evaluated the next expiring timer of CLOCK_MONOTONIC we ignore that the expiry time of newtimer might be earlier than the overall next expiry time in hrtimer_interrupt(). To solve this, remove the caching of the next expiry value from hrtimer_interrupt() and reevaluate all active clock bases for the next expiry value. To avoid another code duplication, create a shared evaluation function and use it for hrtimer_get_next_event(), hrtimer_force_reprogram() and hrtimer_interrupt(). There is another subtlety in this mechanism: While hrtimer_interrupt() is running, we want to avoid to touch the hardware device because we will reprogram it anyway at the end of hrtimer_interrupt(). This works nicely for hrtimers which get rearmed via the HRTIMER_RESTART mechanism, because we drop out when the callback on that CPU is running. But that fails, if a new timer gets enqueued like in the example above. This has another implication: While hrtimer_interrupt() is running we refuse remote enqueueing of timers - see hrtimer_interrupt() and hrtimer_check_target(). hrtimer_interrupt() tries to prevent this by setting cpu_base->expires to KTIME_MAX, but that fails if a new timer gets queued. Prevent both the hardware access and the remote enqueue explicitely. We can loosen the restriction on the remote enqueue now due to reevaluation of the next expiry value, but that needs a seperate patch. Folded in a fix from Vignesh Radhakrishnan. Reported-and-tested-by: Stanislav Fomichev <[email protected]> Based-on-patch-by: Stanislav Fomichev <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1501202049190.5526@nanos Signed-off-by: Thomas Gleixner <[email protected]>
1 parent 41fbf3b commit 9bc7491

File tree

2 files changed

+52
-58
lines changed

2 files changed

+52
-58
lines changed

include/linux/hrtimer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ enum hrtimer_base_type {
170170
* @clock_was_set: Indicates that clock was set from irq context.
171171
* @expires_next: absolute time of the next event which was scheduled
172172
* via clock_set_next_event()
173+
* @in_hrtirq: hrtimer_interrupt() is currently executing
173174
* @hres_active: State of high resolution mode
174175
* @hang_detected: The last hrtimer interrupt detected a hang
175176
* @nr_events: Total number of hrtimer interrupt events
@@ -185,6 +186,7 @@ struct hrtimer_cpu_base {
185186
unsigned int clock_was_set;
186187
#ifdef CONFIG_HIGH_RES_TIMERS
187188
ktime_t expires_next;
189+
int in_hrtirq;
188190
int hres_active;
189191
int hang_detected;
190192
unsigned long nr_events;

kernel/time/hrtimer.c

Lines changed: 50 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,37 @@ static inline void debug_deactivate(struct hrtimer *timer)
440440
trace_hrtimer_cancel(timer);
441441
}
442442

443+
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
444+
ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
445+
{
446+
struct hrtimer_clock_base *base = cpu_base->clock_base;
447+
ktime_t expires, expires_next = { .tv64 = KTIME_MAX };
448+
int i;
449+
450+
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
451+
struct timerqueue_node *next;
452+
struct hrtimer *timer;
453+
454+
next = timerqueue_getnext(&base->active);
455+
if (!next)
456+
continue;
457+
458+
timer = container_of(next, struct hrtimer, node);
459+
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
460+
if (expires.tv64 < expires_next.tv64)
461+
expires_next = expires;
462+
}
463+
/*
464+
* clock_was_set() might have changed base->offset of any of
465+
* the clock bases so the result might be negative. Fix it up
466+
* to prevent a false positive in clockevents_program_event().
467+
*/
468+
if (expires_next.tv64 < 0)
469+
expires_next.tv64 = 0;
470+
return expires_next;
471+
}
472+
#endif
473+
443474
/* High resolution timer related functions */
444475
#ifdef CONFIG_HIGH_RES_TIMERS
445476

@@ -488,32 +519,7 @@ static inline int hrtimer_hres_active(void)
488519
static void
489520
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
490521
{
491-
int i;
492-
struct hrtimer_clock_base *base = cpu_base->clock_base;
493-
ktime_t expires, expires_next;
494-
495-
expires_next.tv64 = KTIME_MAX;
496-
497-
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
498-
struct hrtimer *timer;
499-
struct timerqueue_node *next;
500-
501-
next = timerqueue_getnext(&base->active);
502-
if (!next)
503-
continue;
504-
timer = container_of(next, struct hrtimer, node);
505-
506-
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
507-
/*
508-
* clock_was_set() has changed base->offset so the
509-
* result might be negative. Fix it up to prevent a
510-
* false positive in clockevents_program_event()
511-
*/
512-
if (expires.tv64 < 0)
513-
expires.tv64 = 0;
514-
if (expires.tv64 < expires_next.tv64)
515-
expires_next = expires;
516-
}
522+
ktime_t expires_next = __hrtimer_get_next_event(cpu_base);
517523

518524
if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
519525
return;
@@ -586,6 +592,15 @@ static int hrtimer_reprogram(struct hrtimer *timer,
586592
if (expires.tv64 >= cpu_base->expires_next.tv64)
587593
return 0;
588594

595+
/*
596+
* When the target cpu of the timer is currently executing
597+
* hrtimer_interrupt(), then we do not touch the clock event
598+
* device. hrtimer_interrupt() will reevaluate all clock bases
599+
* before reprogramming the device.
600+
*/
601+
if (cpu_base->in_hrtirq)
602+
return 0;
603+
589604
/*
590605
* If a hang was detected in the last timer interrupt then we
591606
* do not schedule a timer which is earlier than the expiry
@@ -1104,29 +1119,14 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
11041119
ktime_t hrtimer_get_next_event(void)
11051120
{
11061121
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1107-
struct hrtimer_clock_base *base = cpu_base->clock_base;
1108-
ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
1122+
ktime_t mindelta = { .tv64 = KTIME_MAX };
11091123
unsigned long flags;
1110-
int i;
11111124

11121125
raw_spin_lock_irqsave(&cpu_base->lock, flags);
11131126

1114-
if (!hrtimer_hres_active()) {
1115-
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
1116-
struct hrtimer *timer;
1117-
struct timerqueue_node *next;
1118-
1119-
next = timerqueue_getnext(&base->active);
1120-
if (!next)
1121-
continue;
1122-
1123-
timer = container_of(next, struct hrtimer, node);
1124-
delta.tv64 = hrtimer_get_expires_tv64(timer);
1125-
delta = ktime_sub(delta, base->get_time());
1126-
if (delta.tv64 < mindelta.tv64)
1127-
mindelta.tv64 = delta.tv64;
1128-
}
1129-
}
1127+
if (!hrtimer_hres_active())
1128+
mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base),
1129+
ktime_get());
11301130

11311131
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
11321132

@@ -1253,7 +1253,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
12531253
raw_spin_lock(&cpu_base->lock);
12541254
entry_time = now = hrtimer_update_base(cpu_base);
12551255
retry:
1256-
expires_next.tv64 = KTIME_MAX;
1256+
cpu_base->in_hrtirq = 1;
12571257
/*
12581258
* We set expires_next to KTIME_MAX here with cpu_base->lock
12591259
* held to prevent that a timer is enqueued in our queue via
@@ -1291,28 +1291,20 @@ void hrtimer_interrupt(struct clock_event_device *dev)
12911291
* are right-of a not yet expired timer, because that
12921292
* timer will have to trigger a wakeup anyway.
12931293
*/
1294-
1295-
if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
1296-
ktime_t expires;
1297-
1298-
expires = ktime_sub(hrtimer_get_expires(timer),
1299-
base->offset);
1300-
if (expires.tv64 < 0)
1301-
expires.tv64 = KTIME_MAX;
1302-
if (expires.tv64 < expires_next.tv64)
1303-
expires_next = expires;
1294+
if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
13041295
break;
1305-
}
13061296

13071297
__run_hrtimer(timer, &basenow);
13081298
}
13091299
}
1310-
1300+
/* Reevaluate the clock bases for the next expiry */
1301+
expires_next = __hrtimer_get_next_event(cpu_base);
13111302
/*
13121303
* Store the new expiry value so the migration code can verify
13131304
* against it.
13141305
*/
13151306
cpu_base->expires_next = expires_next;
1307+
cpu_base->in_hrtirq = 0;
13161308
raw_spin_unlock(&cpu_base->lock);
13171309

13181310
/* Reprogramming necessary ? */

0 commit comments

Comments
 (0)