Skip to content

Commit 1f25184

Browse files
author
Ingo Molnar
committed
Merge branch 'timers/core-v9' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz
Pull nohz enhancements from Frederic Weisbecker: "Currently in nohz full configs, the tick dependency is checked asynchronously by nohz code from interrupt and context switch for each concerned subsystem with a set of function provided by these. Such functions are made of many conditions and details that can be heavyweight as they are called on fastpath: sched_can_stop_tick(), posix_cpu_timer_can_stop_tick(), perf_event_can_stop_tick()... Thomas suggested a few months ago to make that tick dependency check synchronous. Instead of checking subsystems details from each interrupt to guess if the tick can be stopped, every subsystem that may have a tick dependency should set itself a flag specifying the state of that dependency. This way we can verify if we can stop the tick with a single lightweight mask check on fast path. This conversion from a pull to a push model to implement tick dependency is the core feature of this patchset that is split into: * Nohz wide kick simplification * Improve nohz tracing * Introduce tick dependency mask * Migrate scheduler, posix timers, perf events and sched clock tick dependencies to the tick dependency mask." Signed-off-by: Ingo Molnar <[email protected]>
2 parents e2857b8 + 4f49b90 commit 1f25184

File tree

14 files changed

+424
-161
lines changed

14 files changed

+424
-161
lines changed

include/linux/atomic.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
548548
}
549549
#endif
550550

551+
/**
552+
* fetch_or - perform *ptr |= mask and return old value of *ptr
553+
* @ptr: pointer to value
554+
* @mask: mask to OR on the value
555+
*
556+
* cmpxchg based fetch_or, macro so it works for different integer types
557+
*/
558+
#ifndef fetch_or
559+
#define fetch_or(ptr, mask) \
560+
({ typeof(*(ptr)) __old, __val = *(ptr); \
561+
for (;;) { \
562+
__old = cmpxchg((ptr), __val, __val | (mask)); \
563+
if (__old == __val) \
564+
break; \
565+
__val = __old; \
566+
} \
567+
__old; \
568+
})
569+
#endif
570+
571+
551572
#ifdef CONFIG_GENERIC_ATOMIC64
552573
#include <asm-generic/atomic64.h>
553574
#endif

include/linux/perf_event.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,12 +1109,6 @@ static inline void perf_event_task_tick(void) { }
11091109
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
11101110
#endif
11111111

1112-
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
1113-
extern bool perf_event_can_stop_tick(void);
1114-
#else
1115-
static inline bool perf_event_can_stop_tick(void) { return true; }
1116-
#endif
1117-
11181112
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
11191113
extern void perf_restore_debug_store(void);
11201114
#else

include/linux/posix-timers.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
128128
void run_posix_cpu_timers(struct task_struct *task);
129129
void posix_cpu_timers_exit(struct task_struct *task);
130130
void posix_cpu_timers_exit_group(struct task_struct *task);
131-
132-
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
133-
134131
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
135132
cputime_t *newval, cputime_t *oldval);
136133

include/linux/sched.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,10 @@ struct signal_struct {
719719
/* Earliest-expiration cache. */
720720
struct task_cputime cputime_expires;
721721

722+
#ifdef CONFIG_NO_HZ_FULL
723+
unsigned long tick_dep_mask;
724+
#endif
725+
722726
struct list_head cpu_timers[3];
723727

724728
struct pid *tty_old_pgrp;
@@ -1542,6 +1546,10 @@ struct task_struct {
15421546
VTIME_SYS,
15431547
} vtime_snap_whence;
15441548
#endif
1549+
1550+
#ifdef CONFIG_NO_HZ_FULL
1551+
unsigned long tick_dep_mask;
1552+
#endif
15451553
unsigned long nvcsw, nivcsw; /* context switch counts */
15461554
u64 start_time; /* monotonic time in nsec */
15471555
u64 real_start_time; /* boot based time in nsec */
@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
23562364
#endif
23572365

23582366
#ifdef CONFIG_NO_HZ_FULL
2359-
extern bool sched_can_stop_tick(void);
23602367
extern u64 scheduler_tick_max_deferment(void);
2361-
#else
2362-
static inline bool sched_can_stop_tick(void) { return false; }
23632368
#endif
23642369

23652370
#ifdef CONFIG_SCHED_AUTOGROUP

include/linux/tick.h

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
9797
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
9898
}
9999

100+
enum tick_dep_bits {
101+
TICK_DEP_BIT_POSIX_TIMER = 0,
102+
TICK_DEP_BIT_PERF_EVENTS = 1,
103+
TICK_DEP_BIT_SCHED = 2,
104+
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
105+
};
106+
107+
#define TICK_DEP_MASK_NONE 0
108+
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
109+
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
110+
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
111+
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
112+
100113
#ifdef CONFIG_NO_HZ_COMMON
101114
extern int tick_nohz_enabled;
102115
extern int tick_nohz_tick_stopped(void);
@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
154167
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
155168
}
156169

157-
extern void tick_nohz_full_kick(void);
170+
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
171+
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
172+
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
173+
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
174+
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
175+
enum tick_dep_bits bit);
176+
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
177+
enum tick_dep_bits bit);
178+
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
179+
enum tick_dep_bits bit);
180+
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
181+
enum tick_dep_bits bit);
182+
183+
/*
184+
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
185+
* on top of static keys.
186+
*/
187+
static inline void tick_dep_set(enum tick_dep_bits bit)
188+
{
189+
if (tick_nohz_full_enabled())
190+
tick_nohz_dep_set(bit);
191+
}
192+
193+
static inline void tick_dep_clear(enum tick_dep_bits bit)
194+
{
195+
if (tick_nohz_full_enabled())
196+
tick_nohz_dep_clear(bit);
197+
}
198+
199+
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
200+
{
201+
if (tick_nohz_full_cpu(cpu))
202+
tick_nohz_dep_set_cpu(cpu, bit);
203+
}
204+
205+
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
206+
{
207+
if (tick_nohz_full_cpu(cpu))
208+
tick_nohz_dep_clear_cpu(cpu, bit);
209+
}
210+
211+
static inline void tick_dep_set_task(struct task_struct *tsk,
212+
enum tick_dep_bits bit)
213+
{
214+
if (tick_nohz_full_enabled())
215+
tick_nohz_dep_set_task(tsk, bit);
216+
}
217+
static inline void tick_dep_clear_task(struct task_struct *tsk,
218+
enum tick_dep_bits bit)
219+
{
220+
if (tick_nohz_full_enabled())
221+
tick_nohz_dep_clear_task(tsk, bit);
222+
}
223+
static inline void tick_dep_set_signal(struct signal_struct *signal,
224+
enum tick_dep_bits bit)
225+
{
226+
if (tick_nohz_full_enabled())
227+
tick_nohz_dep_set_signal(signal, bit);
228+
}
229+
static inline void tick_dep_clear_signal(struct signal_struct *signal,
230+
enum tick_dep_bits bit)
231+
{
232+
if (tick_nohz_full_enabled())
233+
tick_nohz_dep_clear_signal(signal, bit);
234+
}
235+
158236
extern void tick_nohz_full_kick_cpu(int cpu);
159-
extern void tick_nohz_full_kick_all(void);
160237
extern void __tick_nohz_task_switch(void);
161238
#else
162239
static inline int housekeeping_any_cpu(void)
@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
166243
static inline bool tick_nohz_full_enabled(void) { return false; }
167244
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
168245
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
246+
247+
static inline void tick_dep_set(enum tick_dep_bits bit) { }
248+
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
249+
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
250+
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
251+
static inline void tick_dep_set_task(struct task_struct *tsk,
252+
enum tick_dep_bits bit) { }
253+
static inline void tick_dep_clear_task(struct task_struct *tsk,
254+
enum tick_dep_bits bit) { }
255+
static inline void tick_dep_set_signal(struct signal_struct *signal,
256+
enum tick_dep_bits bit) { }
257+
static inline void tick_dep_clear_signal(struct signal_struct *signal,
258+
enum tick_dep_bits bit) { }
259+
169260
static inline void tick_nohz_full_kick_cpu(int cpu) { }
170-
static inline void tick_nohz_full_kick(void) { }
171-
static inline void tick_nohz_full_kick_all(void) { }
172261
static inline void __tick_nohz_task_switch(void) { }
173262
#endif
174263

include/trace/events/timer.h

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
328328
);
329329

330330
#ifdef CONFIG_NO_HZ_COMMON
331+
332+
#define TICK_DEP_NAMES \
333+
tick_dep_name(NONE) \
334+
tick_dep_name(POSIX_TIMER) \
335+
tick_dep_name(PERF_EVENTS) \
336+
tick_dep_name(SCHED) \
337+
tick_dep_name_end(CLOCK_UNSTABLE)
338+
339+
#undef tick_dep_name
340+
#undef tick_dep_name_end
341+
342+
#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
343+
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
344+
345+
TICK_DEP_NAMES
346+
347+
#undef tick_dep_name
348+
#undef tick_dep_name_end
349+
350+
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
351+
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
352+
353+
#define show_tick_dep_name(val) \
354+
__print_symbolic(val, TICK_DEP_NAMES)
355+
331356
TRACE_EVENT(tick_stop,
332357

333-
TP_PROTO(int success, char *error_msg),
358+
TP_PROTO(int success, int dependency),
334359

335-
TP_ARGS(success, error_msg),
360+
TP_ARGS(success, dependency),
336361

337362
TP_STRUCT__entry(
338363
__field( int , success )
339-
__string( msg, error_msg )
364+
__field( int , dependency )
340365
),
341366

342367
TP_fast_assign(
343368
__entry->success = success;
344-
__assign_str(msg, error_msg);
369+
__entry->dependency = dependency;
345370
),
346371

347-
TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg))
372+
TP_printk("success=%d dependency=%s", __entry->success, \
373+
show_tick_dep_name(__entry->dependency))
348374
);
349375
#endif
350376

kernel/events/core.c

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
31123112
return rotate;
31133113
}
31143114

3115-
#ifdef CONFIG_NO_HZ_FULL
3116-
bool perf_event_can_stop_tick(void)
3117-
{
3118-
if (atomic_read(&nr_freq_events) ||
3119-
__this_cpu_read(perf_throttled_count))
3120-
return false;
3121-
else
3122-
return true;
3123-
}
3124-
#endif
3125-
31263115
void perf_event_task_tick(void)
31273116
{
31283117
struct list_head *head = this_cpu_ptr(&active_ctx_list);
@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
31333122

31343123
__this_cpu_inc(perf_throttled_seq);
31353124
throttled = __this_cpu_xchg(perf_throttled_count, 0);
3125+
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
31363126

31373127
list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
31383128
perf_adjust_freq_unthr_context(ctx, throttled);
@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
35643554
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
35653555
}
35663556

3557+
#ifdef CONFIG_NO_HZ_FULL
3558+
static DEFINE_SPINLOCK(nr_freq_lock);
3559+
#endif
3560+
3561+
static void unaccount_freq_event_nohz(void)
3562+
{
3563+
#ifdef CONFIG_NO_HZ_FULL
3564+
spin_lock(&nr_freq_lock);
3565+
if (atomic_dec_and_test(&nr_freq_events))
3566+
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
3567+
spin_unlock(&nr_freq_lock);
3568+
#endif
3569+
}
3570+
3571+
static void unaccount_freq_event(void)
3572+
{
3573+
if (tick_nohz_full_enabled())
3574+
unaccount_freq_event_nohz();
3575+
else
3576+
atomic_dec(&nr_freq_events);
3577+
}
3578+
35673579
static void unaccount_event(struct perf_event *event)
35683580
{
35693581
bool dec = false;
@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
35803592
if (event->attr.task)
35813593
atomic_dec(&nr_task_events);
35823594
if (event->attr.freq)
3583-
atomic_dec(&nr_freq_events);
3595+
unaccount_freq_event();
35843596
if (event->attr.context_switch) {
35853597
dec = true;
35863598
atomic_dec(&nr_switch_events);
@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
64246436
if (unlikely(throttle
64256437
&& hwc->interrupts >= max_samples_per_tick)) {
64266438
__this_cpu_inc(perf_throttled_count);
6439+
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
64276440
hwc->interrupts = MAX_INTERRUPTS;
64286441
perf_log_throttle(event, 0);
6429-
tick_nohz_full_kick();
64306442
ret = 1;
64316443
}
64326444
}
@@ -7816,6 +7828,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
78167828
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
78177829
}
78187830

7831+
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
7832+
static void account_freq_event_nohz(void)
7833+
{
7834+
#ifdef CONFIG_NO_HZ_FULL
7835+
/* Lock so we don't race with concurrent unaccount */
7836+
spin_lock(&nr_freq_lock);
7837+
if (atomic_inc_return(&nr_freq_events) == 1)
7838+
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
7839+
spin_unlock(&nr_freq_lock);
7840+
#endif
7841+
}
7842+
7843+
static void account_freq_event(void)
7844+
{
7845+
if (tick_nohz_full_enabled())
7846+
account_freq_event_nohz();
7847+
else
7848+
atomic_inc(&nr_freq_events);
7849+
}
7850+
7851+
78197852
static void account_event(struct perf_event *event)
78207853
{
78217854
bool inc = false;
@@ -7831,10 +7864,8 @@ static void account_event(struct perf_event *event)
78317864
atomic_inc(&nr_comm_events);
78327865
if (event->attr.task)
78337866
atomic_inc(&nr_task_events);
7834-
if (event->attr.freq) {
7835-
if (atomic_inc_return(&nr_freq_events) == 1)
7836-
tick_nohz_full_kick_all();
7837-
}
7867+
if (event->attr.freq)
7868+
account_freq_event();
78387869
if (event->attr.context_switch) {
78397870
atomic_inc(&nr_switch_events);
78407871
inc = true;

0 commit comments

Comments
 (0)