Skip to content

Commit 08ae95f

Browse files
npigginIngo Molnar
authored andcommitted
nohz_full: Allow the boot CPU to be nohz_full
Allow the boot CPU/CPU0 to be nohz_full. Have the boot CPU take the do_timer duty during boot until a housekeeping CPU can take over. This is supported when CONFIG_PM_SLEEP_SMP is not configured, or when it is configured and the arch allows suspend on non-zero CPUs. nohz_full has been trialed at a large supercomputer site and found to significantly reduce jitter. In order to deploy it in production, they need CPU0 to be nohz_full because their job control system requires the application CPUs to start from 0, and the housekeeping CPUs are placed higher. An equivalent job scheduling that uses CPU0 for housekeeping could be achieved by modifying their system, but it is preferable if nohz_full can support their environment without modification. Signed-off-by: Nicholas Piggin <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Frederic Weisbecker <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Rafael J . Wysocki <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: [email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 9219565 commit 08ae95f

File tree

2 files changed

+70
-14
lines changed

2 files changed

+70
-14
lines changed

kernel/time/tick-common.c

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ ktime_t tick_period;
4646
* procedure also covers cpu hotplug.
4747
*/
4848
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
49+
#ifdef CONFIG_NO_HZ_FULL
50+
/*
51+
* tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
52+
* tick_do_timer_cpu and it should be taken over by an eligible secondary
53+
* when one comes online.
54+
*/
55+
static int tick_do_timer_boot_cpu __read_mostly = -1;
56+
#endif
4957

5058
/*
5159
* Debugging: see timer_list.c
@@ -167,6 +175,26 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
167175
}
168176
}
169177

178+
#ifdef CONFIG_NO_HZ_FULL
179+
static void giveup_do_timer(void *info)
180+
{
181+
int cpu = *(unsigned int *)info;
182+
183+
WARN_ON(tick_do_timer_cpu != smp_processor_id());
184+
185+
tick_do_timer_cpu = cpu;
186+
}
187+
188+
static void tick_take_do_timer_from_boot(void)
189+
{
190+
int cpu = smp_processor_id();
191+
int from = tick_do_timer_boot_cpu;
192+
193+
if (from >= 0 && from != cpu)
194+
smp_call_function_single(from, giveup_do_timer, &cpu, 1);
195+
}
196+
#endif
197+
170198
/*
171199
* Setup the tick device
172200
*/
@@ -186,12 +214,26 @@ static void tick_setup_device(struct tick_device *td,
186214
* this cpu:
187215
*/
188216
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
189-
if (!tick_nohz_full_cpu(cpu))
190-
tick_do_timer_cpu = cpu;
191-
else
192-
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
217+
tick_do_timer_cpu = cpu;
218+
193219
tick_next_period = ktime_get();
194220
tick_period = NSEC_PER_SEC / HZ;
221+
#ifdef CONFIG_NO_HZ_FULL
222+
/*
223+
* The boot CPU may be nohz_full, in which case set
224+
* tick_do_timer_boot_cpu so the first housekeeping
225+
* secondary that comes up will take do_timer from
226+
* us.
227+
*/
228+
if (tick_nohz_full_cpu(cpu))
229+
tick_do_timer_boot_cpu = cpu;
230+
231+
} else if (tick_do_timer_boot_cpu != -1 &&
232+
!tick_nohz_full_cpu(cpu)) {
233+
tick_take_do_timer_from_boot();
234+
tick_do_timer_boot_cpu = -1;
235+
WARN_ON(tick_do_timer_cpu != cpu);
236+
#endif
195237
}
196238

197239
/*

kernel/time/tick-sched.c

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,16 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
121121
* into a long sleep. If two CPUs happen to assign themselves to
122122
* this duty, then the jiffies update is still serialized by
123123
* jiffies_lock.
124+
*
125+
* If nohz_full is enabled, this should not happen because the
126+
* tick_do_timer_cpu never relinquishes.
124127
*/
125-
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
126-
&& !tick_nohz_full_cpu(cpu))
128+
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
129+
#ifdef CONFIG_NO_HZ_FULL
130+
WARN_ON(tick_nohz_full_running);
131+
#endif
127132
tick_do_timer_cpu = cpu;
133+
}
128134
#endif
129135

130136
/* Check, if the jiffies need an update */
@@ -395,8 +401,8 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
395401
static int tick_nohz_cpu_down(unsigned int cpu)
396402
{
397403
/*
398-
* The boot CPU handles housekeeping duty (unbound timers,
399-
* workqueues, timekeeping, ...) on behalf of full dynticks
404+
* The tick_do_timer_cpu CPU handles housekeeping duty (unbound
405+
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
400406
* CPUs. It must remain online when nohz full is enabled.
401407
*/
402408
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
@@ -423,12 +429,15 @@ void __init tick_nohz_init(void)
423429
return;
424430
}
425431

426-
cpu = smp_processor_id();
432+
if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
433+
!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
434+
cpu = smp_processor_id();
427435

428-
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
429-
pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
430-
cpu);
431-
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
436+
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
437+
pr_warn("NO_HZ: Clearing %d from nohz_full range "
438+
"for timekeeping\n", cpu);
439+
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
440+
}
432441
}
433442

434443
for_each_cpu(cpu, tick_nohz_full_mask)
@@ -904,8 +913,13 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
904913
/*
905914
* Boot safety: make sure the timekeeping duty has been
906915
* assigned before entering dyntick-idle mode,
916+
* tick_do_timer_cpu is TICK_DO_TIMER_BOOT
907917
*/
908-
if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
918+
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
919+
return false;
920+
921+
/* Should not happen for nohz-full */
922+
if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
909923
return false;
910924
}
911925

0 commit comments

Comments
 (0)