Skip to content

Commit fb02fbc

Browse files
KAGA-KOKOThomas Gleixner
authored andcommitted
NOHZ: restart tick device from irq_enter()
We did not restart the tick device from irq_enter() to avoid double reprogramming and extra events in the return immediate to idle case. But long lasting softirqs can lead to a situation where jiffies become stale: idle() tick stopped (reprogrammed to next pending timer) halt() interrupt jiffies updated from irq_enter() interrupt handler softirq function 1 runs 20ms softirq function 2 arms a 10ms timer with a stale jiffies value jiffies updated from irq_exit() timer wheel has now an already expired timer (the one added in function 2) timer fires and timer softirq runs This was discovered when debugging a timer problem which happend only when the ath5k driver is active. The debugging proved that there is a softirq function running for more than 20ms, which is a bug by itself. To solve this we restart the tick timer right from irq_enter(), but do not go through the other functions which are necessary to return from idle when need_resched() is set. Reported-by: Elias Oltmanns <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Elias Oltmanns <[email protected]>
1 parent c34bec5 commit fb02fbc

File tree

3 files changed

+38
-8
lines changed

3 files changed

+38
-8
lines changed

kernel/time/tick-broadcast.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
383383
return 0;
384384
}
385385

386+
/*
387+
* Called from irq_enter() when idle was interrupted to reenable the
388+
* per cpu device.
389+
*/
390+
void tick_check_oneshot_broadcast(int cpu)
391+
{
392+
if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
393+
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
394+
395+
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
396+
}
397+
}
398+
386399
/*
387400
* Handle oneshot mode broadcasting
388401
*/

kernel/time/tick-internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
3636
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
3737
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
3838
extern int tick_broadcast_oneshot_active(void);
39+
extern void tick_check_oneshot_broadcast(int cpu);
3940
# else /* BROADCAST */
4041
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
4142
{
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
4546
static inline void tick_broadcast_switch_to_oneshot(void) { }
4647
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
4748
static inline int tick_broadcast_oneshot_active(void) { return 0; }
49+
static inline void tick_check_oneshot_broadcast(int cpu) { }
4850
# endif /* !BROADCAST */
4951

5052
#else /* !ONESHOT */

kernel/time/tick-sched.c

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -508,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev)
508508
update_process_times(user_mode(regs));
509509
profile_tick(CPU_PROFILING);
510510

511-
/* Do not restart, when we are in the idle loop */
512-
if (ts->tick_stopped)
513-
return;
514-
515511
while (tick_nohz_reprogram(ts, now)) {
516512
now = ktime_get();
517513
tick_do_update_jiffies64(now);
@@ -557,6 +553,27 @@ static void tick_nohz_switch_to_nohz(void)
557553
smp_processor_id());
558554
}
559555

556+
/*
557+
* When NOHZ is enabled and the tick is stopped, we need to kick the
558+
* tick timer from irq_enter() so that the jiffies update is kept
559+
* alive during long running softirqs. That's ugly as hell, but
560+
* correctness is key even if we need to fix the offending softirq in
561+
* the first place.
562+
*
563+
* Note, this is different to tick_nohz_restart. We just kick the
564+
* timer and do not touch the other magic bits which need to be done
565+
* when idle is left.
566+
*/
567+
static void tick_nohz_kick_tick(int cpu)
568+
{
569+
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
570+
571+
if (!ts->tick_stopped)
572+
return;
573+
574+
tick_nohz_restart(ts, ktime_get());
575+
}
576+
560577
#else
561578

562579
static inline void tick_nohz_switch_to_nohz(void) { }
@@ -568,9 +585,11 @@ static inline void tick_nohz_switch_to_nohz(void) { }
568585
*/
569586
void tick_check_idle(int cpu)
570587
{
588+
tick_check_oneshot_broadcast(cpu);
571589
#ifdef CONFIG_NO_HZ
572590
tick_nohz_stop_idle(cpu);
573591
tick_nohz_update_jiffies();
592+
tick_nohz_kick_tick(cpu);
574593
#endif
575594
}
576595

@@ -627,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
627646
profile_tick(CPU_PROFILING);
628647
}
629648

630-
/* Do not restart, when we are in the idle loop */
631-
if (ts->tick_stopped)
632-
return HRTIMER_NORESTART;
633-
634649
hrtimer_forward(timer, now, tick_period);
635650

636651
return HRTIMER_RESTART;

0 commit comments

Comments
 (0)