Skip to content

Commit 5680d80

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/clock: Provide better clock continuity
When switching between the unstable and stable variants it is currently possible that clock discontinuities occur. And while these will mostly be 'small', attempt to do better. As observed on my IVB-EP, the sched_clock() is ~1.5s ahead of the ktime_get_ns() based timeline at the point of switchover (sched_clock_init_late()) after SMP bringup. Equally, when the TSC is later found to be unstable -- typically because SMM tries to hide its SMI latencies by mucking with the TSC -- we want to avoid large jumps. Since the clocksource watchdog reports the issue after the fact we cannot exactly fix up time, but since SMI latencies are typically small (~10ns range), the discontinuity is mainly due to drift between sched_clock() and ktime_get_ns() (which on my desktop is ~79s over 24days). I dislike this patch because it adds overhead to the good case in favour of dealing with badness. But given the widespread failure of TSC stability this is worth it. Note that in case the TSC makes drastic jumps after SMP bringup we're still hosed. There's just not much we can do in that case without stupid overhead. If we were to somehow expose tsc_clocksource_reliable (which is hard because this code is also used on ia64 and parisc) we could avoid some of the newly introduced overhead. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Mike Galbraith <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: [email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 9881b02 commit 5680d80

File tree

1 file changed

+65
-34
lines changed

1 file changed

+65
-34
lines changed

kernel/sched/clock.c

Lines changed: 65 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,48 @@ void sched_clock_init(void)
8686
static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
8787
static int __sched_clock_stable_early;
8888

89+
/*
90+
* We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
91+
*/
92+
static __read_mostly u64 raw_offset;
93+
static __read_mostly u64 gtod_offset;
94+
95+
struct sched_clock_data {
96+
u64 tick_raw;
97+
u64 tick_gtod;
98+
u64 clock;
99+
};
100+
101+
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
102+
103+
static inline struct sched_clock_data *this_scd(void)
104+
{
105+
return this_cpu_ptr(&sched_clock_data);
106+
}
107+
108+
static inline struct sched_clock_data *cpu_sdc(int cpu)
109+
{
110+
return &per_cpu(sched_clock_data, cpu);
111+
}
112+
89113
int sched_clock_stable(void)
90114
{
91115
return static_branch_likely(&__sched_clock_stable);
92116
}
93117

94118
static void __set_sched_clock_stable(void)
95119
{
120+
struct sched_clock_data *scd = this_scd();
121+
122+
/*
123+
* Attempt to make the (initial) unstable->stable transition continuous.
124+
*/
125+
raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
126+
127+
printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
128+
scd->tick_gtod, gtod_offset,
129+
scd->tick_raw, raw_offset);
130+
96131
static_branch_enable(&__sched_clock_stable);
97132
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
98133
}
@@ -117,7 +152,23 @@ void set_sched_clock_stable(void)
117152

118153
static void __clear_sched_clock_stable(struct work_struct *work)
119154
{
120-
/* XXX worry about clock continuity */
155+
struct sched_clock_data *scd = this_scd();
156+
157+
/*
158+
* Attempt to make the stable->unstable transition continuous.
159+
*
160+
* Trouble is, this is typically called from the TSC watchdog
161+
* timer, which is late per definition. This means the tick
162+
* values can already be screwy.
163+
*
164+
* Still do what we can.
165+
*/
166+
gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
167+
168+
printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
169+
scd->tick_gtod, gtod_offset,
170+
scd->tick_raw, raw_offset);
171+
121172
static_branch_disable(&__sched_clock_stable);
122173
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
123174
}
@@ -134,28 +185,9 @@ void clear_sched_clock_stable(void)
134185
schedule_work(&sched_clock_work);
135186
}
136187

137-
struct sched_clock_data {
138-
u64 tick_raw;
139-
u64 tick_gtod;
140-
u64 clock;
141-
};
142-
143-
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
144-
145-
static inline struct sched_clock_data *this_scd(void)
146-
{
147-
return this_cpu_ptr(&sched_clock_data);
148-
}
149-
150-
static inline struct sched_clock_data *cpu_sdc(int cpu)
151-
{
152-
return &per_cpu(sched_clock_data, cpu);
153-
}
154-
155188
void sched_clock_init_late(void)
156189
{
157190
sched_clock_running = 2;
158-
159191
/*
160192
* Ensure that it is impossible to not do a static_key update.
161193
*
@@ -210,7 +242,7 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
210242
* scd->tick_gtod + TICK_NSEC);
211243
*/
212244

213-
clock = scd->tick_gtod + delta;
245+
clock = scd->tick_gtod + gtod_offset + delta;
214246
min_clock = wrap_max(scd->tick_gtod, old_clock);
215247
max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
216248

@@ -296,7 +328,7 @@ u64 sched_clock_cpu(int cpu)
296328
u64 clock;
297329

298330
if (sched_clock_stable())
299-
return sched_clock();
331+
return sched_clock() + raw_offset;
300332

301333
if (unlikely(!sched_clock_running))
302334
return 0ull;
@@ -317,23 +349,22 @@ EXPORT_SYMBOL_GPL(sched_clock_cpu);
317349
void sched_clock_tick(void)
318350
{
319351
struct sched_clock_data *scd;
320-
u64 now, now_gtod;
321-
322-
if (sched_clock_stable())
323-
return;
324-
325-
if (unlikely(!sched_clock_running))
326-
return;
327352

328353
WARN_ON_ONCE(!irqs_disabled());
329354

355+
/*
356+
* Update these values even if sched_clock_stable(), because it can
357+
* become unstable at any point in time at which point we need some
358+
* values to fall back on.
359+
*
360+
* XXX arguably we can skip this if we expose tsc_clocksource_reliable
361+
*/
330362
scd = this_scd();
331-
now_gtod = ktime_to_ns(ktime_get());
332-
now = sched_clock();
363+
scd->tick_raw = sched_clock();
364+
scd->tick_gtod = ktime_get_ns();
333365

334-
scd->tick_raw = now;
335-
scd->tick_gtod = now_gtod;
336-
sched_clock_local(scd);
366+
if (!sched_clock_stable() && likely(sched_clock_running))
367+
sched_clock_local(scd);
337368
}
338369

339370
/*

0 commit comments

Comments
 (0)