Skip to content

Commit a683f39

Browse files
KAGA-KOKOIngo Molnar
authored andcommitted
timers: Forward the wheel clock whenever possible
The wheel clock is stale when a CPU goes into a long idle sleep. This has the side effect that timers which are queued end up in the outer wheel levels. That results in coarser granularity. To solve this, we keep track of the idle state and forward the wheel clock whenever possible. Signed-off-by: Thomas Gleixner <[email protected]> Cc: Arjan van de Ven <[email protected]> Cc: Chris Mason <[email protected]> Cc: Eric Dumazet <[email protected]> Cc: Frederic Weisbecker <[email protected]> Cc: George Spelvin <[email protected]> Cc: Josh Triplett <[email protected]> Cc: Len Brown <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Paul E. McKenney <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Rik van Riel <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent ff00673 commit a683f39

File tree

3 files changed

+120
-21
lines changed

3 files changed

+120
-21
lines changed

kernel/time/tick-internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,4 @@ static inline void timers_update_migration(bool update_nohz) { }
164164
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
165165

166166
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
167+
void timer_clear_idle(void);

kernel/time/tick-sched.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
700700
delta = next_tick - basemono;
701701
if (delta <= (u64)TICK_NSEC) {
702702
tick.tv64 = 0;
703+
704+
/*
705+
* Tell the timer code that the base is not idle, i.e. undo
706+
* the effect of get_next_timer_interrupt():
707+
*/
708+
timer_clear_idle();
703709
/*
704710
* We've not stopped the tick yet, and there's a timer in the
705711
* next period, so no point in stopping it either, bail.
@@ -809,6 +815,12 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
809815
tick_do_update_jiffies64(now);
810816
cpu_load_update_nohz_stop();
811817

818+
/*
819+
* Clear the timer idle flag, so we avoid IPIs on remote queueing and
820+
* the clock forward checks in the enqueue path:
821+
*/
822+
timer_clear_idle();
823+
812824
calc_load_exit_idle();
813825
touch_softlockup_watchdog_sched();
814826
/*

kernel/time/timer.c

Lines changed: 107 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,11 @@ struct timer_base {
196196
spinlock_t lock;
197197
struct timer_list *running_timer;
198198
unsigned long clk;
199+
unsigned long next_expiry;
199200
unsigned int cpu;
200201
bool migration_enabled;
201202
bool nohz_active;
203+
bool is_idle;
202204
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
203205
struct hlist_head vectors[WHEEL_SIZE];
204206
} ____cacheline_aligned;
@@ -519,24 +521,37 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
519521
{
520522
__internal_add_timer(base, timer);
521523

524+
if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
525+
return;
526+
522527
/*
523-
* Check whether the other CPU is in dynticks mode and needs
524-
* to be triggered to reevaluate the timer wheel. We are
525-
* protected against the other CPU fiddling with the timer by
526-
* holding the timer base lock. This also makes sure that a
527-
* CPU on the way to stop its tick can not evaluate the timer
528-
* wheel.
529-
*
530-
* Spare the IPI for deferrable timers on idle targets though.
531-
* The next busy ticks will take care of it. Except full dynticks
532-
* require special care against races with idle_cpu(), lets deal
533-
* with that later.
528+
* TODO: This wants some optimizing similar to the code below, but we
529+
* will do that when we switch from push to pull for deferrable timers.
534530
*/
535-
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) {
536-
if (!(timer->flags & TIMER_DEFERRABLE) ||
537-
tick_nohz_full_cpu(base->cpu))
531+
if (timer->flags & TIMER_DEFERRABLE) {
532+
if (tick_nohz_full_cpu(base->cpu))
538533
wake_up_nohz_cpu(base->cpu);
534+
return;
539535
}
536+
537+
/*
538+
* We might have to IPI the remote CPU if the base is idle and the
539+
* timer is not deferrable. If the other CPU is on the way to idle
540+
* then it can't set base->is_idle as we hold the base lock:
541+
*/
542+
if (!base->is_idle)
543+
return;
544+
545+
/* Check whether this is the new first expiring timer: */
546+
if (time_after_eq(timer->expires, base->next_expiry))
547+
return;
548+
549+
/*
550+
* Set the next expiry time and kick the CPU so it can reevaluate the
551+
* wheel:
552+
*/
553+
base->next_expiry = timer->expires;
554+
wake_up_nohz_cpu(base->cpu);
540555
}
541556

542557
#ifdef CONFIG_TIMER_STATS
@@ -844,10 +859,11 @@ static inline struct timer_base *get_timer_base(u32 tflags)
844859
return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
845860
}
846861

847-
static inline struct timer_base *get_target_base(struct timer_base *base,
848-
unsigned tflags)
862+
#ifdef CONFIG_NO_HZ_COMMON
863+
static inline struct timer_base *
864+
__get_target_base(struct timer_base *base, unsigned tflags)
849865
{
850-
#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
866+
#ifdef CONFIG_SMP
851867
if ((tflags & TIMER_PINNED) || !base->migration_enabled)
852868
return get_timer_this_cpu_base(tflags);
853869
return get_timer_cpu_base(tflags, get_nohz_timer_target());
@@ -856,6 +872,43 @@ static inline struct timer_base *get_target_base(struct timer_base *base,
856872
#endif
857873
}
858874

875+
static inline void forward_timer_base(struct timer_base *base)
876+
{
877+
/*
878+
* We only forward the base when it's idle and we have a delta between
879+
* base clock and jiffies.
880+
*/
881+
if (!base->is_idle || (long) (jiffies - base->clk) < 2)
882+
return;
883+
884+
/*
885+
* If the next expiry value is > jiffies, then we fast forward to
886+
* jiffies otherwise we forward to the next expiry value.
887+
*/
888+
if (time_after(base->next_expiry, jiffies))
889+
base->clk = jiffies;
890+
else
891+
base->clk = base->next_expiry;
892+
}
893+
#else
894+
static inline struct timer_base *
895+
__get_target_base(struct timer_base *base, unsigned tflags)
896+
{
897+
return get_timer_this_cpu_base(tflags);
898+
}
899+
900+
static inline void forward_timer_base(struct timer_base *base) { }
901+
#endif
902+
903+
static inline struct timer_base *
904+
get_target_base(struct timer_base *base, unsigned tflags)
905+
{
906+
struct timer_base *target = __get_target_base(base, tflags);
907+
908+
forward_timer_base(target);
909+
return target;
910+
}
911+
859912
/*
860913
* We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
861914
* that all timers which are tied to this base are locked, and the base itself
@@ -1417,16 +1470,49 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
14171470

14181471
spin_lock(&base->lock);
14191472
nextevt = __next_timer_interrupt(base);
1420-
spin_unlock(&base->lock);
1473+
base->next_expiry = nextevt;
1474+
/*
1475+
* We have a fresh next event. Check whether we can forward the base:
1476+
*/
1477+
if (time_after(nextevt, jiffies))
1478+
base->clk = jiffies;
1479+
else if (time_after(nextevt, base->clk))
1480+
base->clk = nextevt;
14211481

1422-
if (time_before_eq(nextevt, basej))
1482+
if (time_before_eq(nextevt, basej)) {
14231483
expires = basem;
1424-
else
1484+
base->is_idle = false;
1485+
} else {
14251486
expires = basem + (nextevt - basej) * TICK_NSEC;
1487+
/*
1488+
* If we expect to sleep more than a tick, mark the base idle:
1489+
*/
1490+
if ((expires - basem) > TICK_NSEC)
1491+
base->is_idle = true;
1492+
}
1493+
spin_unlock(&base->lock);
14261494

14271495
return cmp_next_hrtimer_event(basem, expires);
14281496
}
14291497

1498+
/**
1499+
* timer_clear_idle - Clear the idle state of the timer base
1500+
*
1501+
* Called with interrupts disabled
1502+
*/
1503+
void timer_clear_idle(void)
1504+
{
1505+
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1506+
1507+
/*
1508+
* We do this unlocked. The worst outcome is a remote enqueue sending
1509+
* a pointless IPI, but taking the lock would just make the window for
1510+
* sending the IPI a few instructions smaller for the cost of taking
1511+
* the lock in the exit from idle path.
1512+
*/
1513+
base->is_idle = false;
1514+
}
1515+
14301516
static int collect_expired_timers(struct timer_base *base,
14311517
struct hlist_head *heads)
14321518
{
@@ -1440,7 +1526,7 @@ static int collect_expired_timers(struct timer_base *base,
14401526

14411527
/*
14421528
* If the next timer is ahead of time forward to current
1443-
* jiffies, otherwise forward to the next expiry time.
1529+
* jiffies, otherwise forward to the next expiry time:
14441530
*/
14451531
if (time_after(next, jiffies)) {
14461532
/* The call site will increment clock! */

0 commit comments

Comments
 (0)