Skip to content

Commit 4cfafd3

Browse files
Peter ZijlstraKAGA-KOKO
authored andcommitted
sched,perf: Fix periodic timers
In the below two commits (see Fixes) we have periodic timers that can stop themselves when they're no longer required, but need to be (re)-started when their idle condition changes. Further complications is that we want the timer handler to always do the forward such that it will always correctly deal with the overruns, and we do not want to race such that the handler has already decided to stop, but the (external) restart sees the timer still active and we end up with a 'lost' timer. The problem with the current code is that the re-start can come before the callback does the forward, at which point the forward from the callback will WARN about forwarding an enqueued timer. Now, conceptually its easy to detect if you're before or after the fwd by comparing the expiration time against the current time. Of course, that's expensive (and racy) because we don't have the current time. Alternatively one could cache this state inside the timer, but then everybody pays the overhead of maintaining this extra state, and that is undesired. The only other option that I could see is the external timer_active variable, which I tried to kill before. I would love a nicer interface for this seemingly simple 'problem' but alas. Fixes: 272325c ("perf: Fix mux_interval hrtimer wreckage") Fixes: 77a4d1a ("sched: Cleanup bandwidth timers") Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: Sasha Levin <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: http://lkml.kernel.org/r/[email protected]
1 parent c78c881 commit 4cfafd3

File tree

6 files changed

+42
-33
lines changed

6 files changed

+42
-33
lines changed

include/linux/perf_event.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,8 +566,12 @@ struct perf_cpu_context {
566566
struct perf_event_context *task_ctx;
567567
int active_oncpu;
568568
int exclusive;
569+
570+
raw_spinlock_t hrtimer_lock;
569571
struct hrtimer hrtimer;
570572
ktime_t hrtimer_interval;
573+
unsigned int hrtimer_active;
574+
571575
struct pmu *unique_pmu;
572576
struct perf_cgroup *cgrp;
573577
};

kernel/events/core.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -752,24 +752,21 @@ perf_cgroup_mark_enabled(struct perf_event *event,
752752
static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
753753
{
754754
struct perf_cpu_context *cpuctx;
755-
enum hrtimer_restart ret = HRTIMER_NORESTART;
756755
int rotations = 0;
757756

758757
WARN_ON(!irqs_disabled());
759758

760759
cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
761-
762760
rotations = perf_rotate_context(cpuctx);
763761

764-
/*
765-
* arm timer if needed
766-
*/
767-
if (rotations) {
762+
raw_spin_lock(&cpuctx->hrtimer_lock);
763+
if (rotations)
768764
hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
769-
ret = HRTIMER_RESTART;
770-
}
765+
else
766+
cpuctx->hrtimer_active = 0;
767+
raw_spin_unlock(&cpuctx->hrtimer_lock);
771768

772-
return ret;
769+
return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
773770
}
774771

775772
static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
@@ -792,23 +789,29 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
792789

793790
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
794791

795-
hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
792+
raw_spin_lock_init(&cpuctx->hrtimer_lock);
793+
hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
796794
timer->function = perf_mux_hrtimer_handler;
797795
}
798796

799797
static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
800798
{
801799
struct hrtimer *timer = &cpuctx->hrtimer;
802800
struct pmu *pmu = cpuctx->ctx.pmu;
801+
unsigned long flags;
803802

804803
/* not for SW PMU */
805804
if (pmu->task_ctx_nr == perf_sw_context)
806805
return 0;
807806

808-
if (hrtimer_is_queued(timer))
809-
return 0;
807+
raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
808+
if (!cpuctx->hrtimer_active) {
809+
cpuctx->hrtimer_active = 1;
810+
hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
811+
hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
812+
}
813+
raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
810814

811-
hrtimer_start(timer, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED);
812815
return 0;
813816
}
814817

kernel/sched/core.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,6 @@
9090
#define CREATE_TRACE_POINTS
9191
#include <trace/events/sched.h>
9292

93-
void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
94-
{
95-
/*
96-
* Do not forward the expiration time of active timers;
97-
* we do not want to loose an overrun.
98-
*/
99-
if (!hrtimer_active(period_timer))
100-
hrtimer_forward_now(period_timer, period);
101-
102-
hrtimer_start_expires(period_timer, HRTIMER_MODE_ABS_PINNED);
103-
}
104-
10593
DEFINE_MUTEX(sched_domains_mutex);
10694
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
10795

kernel/sched/fair.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3870,8 +3870,9 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
38703870
if (runtime_refresh_within(cfs_b, min_left))
38713871
return;
38723872

3873-
start_bandwidth_timer(&cfs_b->slack_timer,
3874-
ns_to_ktime(cfs_bandwidth_slack_period));
3873+
hrtimer_start(&cfs_b->slack_timer,
3874+
ns_to_ktime(cfs_bandwidth_slack_period),
3875+
HRTIMER_MODE_REL);
38753876
}
38763877

38773878
/* we know any runtime found here is valid as update_curr() precedes return */
@@ -4012,6 +4013,8 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
40124013

40134014
idle = do_sched_cfs_period_timer(cfs_b, overrun);
40144015
}
4016+
if (idle)
4017+
cfs_b->period_active = 0;
40154018
raw_spin_unlock(&cfs_b->lock);
40164019

40174020
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -4025,7 +4028,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
40254028
cfs_b->period = ns_to_ktime(default_cfs_period());
40264029

40274030
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
4028-
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4031+
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
40294032
cfs_b->period_timer.function = sched_cfs_period_timer;
40304033
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
40314034
cfs_b->slack_timer.function = sched_cfs_slack_timer;
@@ -4039,7 +4042,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
40394042

40404043
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
40414044
{
4042-
start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
4045+
lockdep_assert_held(&cfs_b->lock);
4046+
4047+
if (!cfs_b->period_active) {
4048+
cfs_b->period_active = 1;
4049+
hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
4050+
hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
4051+
}
40434052
}
40444053

40454054
static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)

kernel/sched/rt.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
3131
idle = do_sched_rt_period_timer(rt_b, overrun);
3232
raw_spin_lock(&rt_b->rt_runtime_lock);
3333
}
34+
if (idle)
35+
rt_b->rt_period_active = 0;
3436
raw_spin_unlock(&rt_b->rt_runtime_lock);
3537

3638
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -54,7 +56,11 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
5456
return;
5557

5658
raw_spin_lock(&rt_b->rt_runtime_lock);
57-
start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
59+
if (!rt_b->rt_period_active) {
60+
rt_b->rt_period_active = 1;
61+
hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
62+
hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
63+
}
5864
raw_spin_unlock(&rt_b->rt_runtime_lock);
5965
}
6066

kernel/sched/sched.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ struct rt_bandwidth {
131131
ktime_t rt_period;
132132
u64 rt_runtime;
133133
struct hrtimer rt_period_timer;
134+
unsigned int rt_period_active;
134135
};
135136

136137
void __dl_clear_params(struct task_struct *p);
@@ -215,7 +216,7 @@ struct cfs_bandwidth {
215216
s64 hierarchical_quota;
216217
u64 runtime_expires;
217218

218-
int idle;
219+
int idle, period_active;
219220
struct hrtimer period_timer, slack_timer;
220221
struct list_head throttled_cfs_rq;
221222

@@ -1406,8 +1407,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
14061407
static inline void sched_avg_update(struct rq *rq) { }
14071408
#endif
14081409

1409-
extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
1410-
14111410
/*
14121411
* __task_rq_lock - lock the rq @p resides on.
14131412
*/

0 commit comments

Comments
 (0)