Skip to content

Commit 19035e5

Browse files
committed
Merge branch 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: timers: Logic to move non pinned timers timers: /proc/sys sysctl hook to enable timer migration timers: Identifying the existing pinned timers timers: Framework for identifying pinned timers timers: allow deferrable timers for intervals tv2-tv5 to be deferred Fix up conflicts in kernel/sched.c and kernel/timer.c manually
2 parents f9db6e0 + eea08f3 commit 19035e5

File tree

12 files changed

+163
-21
lines changed

12 files changed

+163
-21
lines changed

arch/x86/kernel/apic/x2apic_uv_x.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ static void uv_heartbeat(unsigned long ignored)
463463
uv_set_scir_bits(bits);
464464

465465
/* enable next timer period */
466-
mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
466+
mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
467467
}
468468

469469
static void __cpuinit uv_heartbeat_enable(int cpu)

include/linux/clockchips.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg);
143143
#endif
144144

145145
#endif
146+
147+
#ifdef CONFIG_GENERIC_CLOCKEVENTS
148+
extern ktime_t clockevents_get_next_event(int cpu);
149+
#else
150+
static inline ktime_t clockevents_get_next_event(int cpu)
151+
{
152+
return (ktime_t) { .tv64 = KTIME_MAX };
153+
}
154+
#endif

include/linux/hrtimer.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,11 @@ struct hrtimer_cpu_base;
3030
* Mode arguments of xxx_hrtimer functions:
3131
*/
3232
enum hrtimer_mode {
33-
HRTIMER_MODE_ABS, /* Time value is absolute */
34-
HRTIMER_MODE_REL, /* Time value is relative to now */
33+
HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */
34+
HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */
35+
HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */
36+
HRTIMER_MODE_ABS_PINNED = 0x02,
37+
HRTIMER_MODE_REL_PINNED = 0x03,
3538
};
3639

3740
/*

include/linux/sched.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
261261
extern cpumask_var_t nohz_cpu_mask;
262262
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
263263
extern int select_nohz_load_balancer(int cpu);
264+
extern int get_nohz_load_balancer(void);
264265
#else
265266
static inline int select_nohz_load_balancer(int cpu)
266267
{
@@ -1796,11 +1797,23 @@ extern unsigned int sysctl_sched_child_runs_first;
17961797
extern unsigned int sysctl_sched_features;
17971798
extern unsigned int sysctl_sched_migration_cost;
17981799
extern unsigned int sysctl_sched_nr_migrate;
1800+
extern unsigned int sysctl_timer_migration;
17991801

18001802
int sched_nr_latency_handler(struct ctl_table *table, int write,
18011803
struct file *file, void __user *buffer, size_t *length,
18021804
loff_t *ppos);
18031805
#endif
1806+
#ifdef CONFIG_SCHED_DEBUG
1807+
static inline unsigned int get_sysctl_timer_migration(void)
1808+
{
1809+
return sysctl_timer_migration;
1810+
}
1811+
#else
1812+
static inline unsigned int get_sysctl_timer_migration(void)
1813+
{
1814+
return 1;
1815+
}
1816+
#endif
18041817
extern unsigned int sysctl_sched_rt_period;
18051818
extern int sysctl_sched_rt_runtime;
18061819

include/linux/timer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,10 @@ extern void add_timer_on(struct timer_list *timer, int cpu);
163163
extern int del_timer(struct timer_list * timer);
164164
extern int mod_timer(struct timer_list *timer, unsigned long expires);
165165
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
166+
extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
166167

168+
#define TIMER_NOT_PINNED 0
169+
#define TIMER_PINNED 1
167170
/*
168171
* The jiffies value which is added to now, when there is no timer
169172
* in the timer wheel:

kernel/hrtimer.c

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <linux/seq_file.h>
4444
#include <linux/err.h>
4545
#include <linux/debugobjects.h>
46+
#include <linux/sched.h>
47+
#include <linux/timer.h>
4648

4749
#include <asm/uaccess.h>
4850

@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
193195
* Switch the timer base to the current CPU when possible.
194196
*/
195197
static inline struct hrtimer_clock_base *
196-
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
198+
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
199+
int pinned)
197200
{
198201
struct hrtimer_clock_base *new_base;
199202
struct hrtimer_cpu_base *new_cpu_base;
203+
int cpu, preferred_cpu = -1;
204+
205+
cpu = smp_processor_id();
206+
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207+
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208+
preferred_cpu = get_nohz_load_balancer();
209+
if (preferred_cpu >= 0)
210+
cpu = preferred_cpu;
211+
}
212+
#endif
200213

201-
new_cpu_base = &__get_cpu_var(hrtimer_bases);
214+
again:
215+
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
202216
new_base = &new_cpu_base->clock_base[base->index];
203217

204218
if (base != new_base) {
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
218232
timer->base = NULL;
219233
spin_unlock(&base->cpu_base->lock);
220234
spin_lock(&new_base->cpu_base->lock);
235+
236+
/* Optimized away for NOHZ=n SMP=n */
237+
if (cpu == preferred_cpu) {
238+
/* Calculate clock monotonic expiry time */
239+
#ifdef CONFIG_HIGH_RES_TIMERS
240+
ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
241+
new_base->offset);
242+
#else
243+
ktime_t expires = hrtimer_get_expires(timer);
244+
#endif
245+
246+
/*
247+
* Get the next event on target cpu from the
248+
* clock events layer.
249+
* This covers the highres=off nohz=on case as well.
250+
*/
251+
ktime_t next = clockevents_get_next_event(cpu);
252+
253+
ktime_t delta = ktime_sub(expires, next);
254+
255+
/*
256+
* We do not migrate the timer when it is expiring
257+
* before the next event on the target cpu because
258+
* we cannot reprogram the target cpu hardware and
259+
* we would cause it to fire late.
260+
*/
261+
if (delta.tv64 < 0) {
262+
cpu = smp_processor_id();
263+
spin_unlock(&new_base->cpu_base->lock);
264+
spin_lock(&base->cpu_base->lock);
265+
timer->base = base;
266+
goto again;
267+
}
268+
}
221269
timer->base = new_base;
222270
}
223271
return new_base;
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
235283
return base;
236284
}
237285

238-
# define switch_hrtimer_base(t, b) (b)
286+
# define switch_hrtimer_base(t, b, p) (b)
239287

240288
#endif /* !CONFIG_SMP */
241289

@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
907955
ret = remove_hrtimer(timer, base);
908956

909957
/* Switch the timer base, if necessary: */
910-
new_base = switch_hrtimer_base(timer, base);
958+
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
911959

912-
if (mode == HRTIMER_MODE_REL) {
960+
if (mode & HRTIMER_MODE_REL) {
913961
tim = ktime_add_safe(tim, new_base->get_time());
914962
/*
915963
* CONFIG_TIME_LOW_RES is a temporary way for architectures

kernel/sched.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
240240
hard = hrtimer_get_expires(&rt_b->rt_period_timer);
241241
delta = ktime_to_ns(ktime_sub(hard, soft));
242242
__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
243-
HRTIMER_MODE_ABS, 0);
243+
HRTIMER_MODE_ABS_PINNED, 0);
244244
}
245245
spin_unlock(&rt_b->rt_runtime_lock);
246246
}
@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void)
11551155
static void hrtick_start(struct rq *rq, u64 delay)
11561156
{
11571157
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
1158-
HRTIMER_MODE_REL, 0);
1158+
HRTIMER_MODE_REL_PINNED, 0);
11591159
}
11601160

11611161
static inline void init_hrtick(void)
@@ -4397,6 +4397,11 @@ static struct {
43974397
.load_balancer = ATOMIC_INIT(-1),
43984398
};
43994399

4400+
int get_nohz_load_balancer(void)
4401+
{
4402+
return atomic_read(&nohz.load_balancer);
4403+
}
4404+
44004405
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
44014406
/**
44024407
* lowest_flag_domain - Return lowest sched_domain containing flag.
@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void)
90299034
}
90309035
#endif /* CONFIG_SMP */
90319036

9037+
const_debug unsigned int sysctl_timer_migration = 1;
9038+
90329039
int in_sched_functions(unsigned long addr)
90339040
{
90349041
return in_lock_functions(addr) ||

kernel/sysctl.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,14 @@ static struct ctl_table kern_table[] = {
328328
.mode = 0644,
329329
.proc_handler = &proc_dointvec,
330330
},
331+
{
332+
.ctl_name = CTL_UNNUMBERED,
333+
.procname = "timer_migration",
334+
.data = &sysctl_timer_migration,
335+
.maxlen = sizeof(unsigned int),
336+
.mode = 0644,
337+
.proc_handler = &proc_dointvec,
338+
},
331339
#endif
332340
{
333341
.ctl_name = CTL_UNNUMBERED,

kernel/time/clockevents.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/notifier.h>
1919
#include <linux/smp.h>
2020
#include <linux/sysdev.h>
21+
#include <linux/tick.h>
2122

2223
/* The registered clock event devices */
2324
static LIST_HEAD(clockevent_devices);
@@ -253,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg)
253254
spin_unlock(&clockevents_lock);
254255
}
255256
EXPORT_SYMBOL_GPL(clockevents_notify);
257+
258+
ktime_t clockevents_get_next_event(int cpu)
259+
{
260+
struct tick_device *td;
261+
struct clock_event_device *dev;
262+
263+
td = &per_cpu(tick_cpu_device, cpu);
264+
dev = td->evtdev;
265+
266+
return dev->next_event;
267+
}
256268
#endif

kernel/time/tick-sched.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle)
349349

350350
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
351351
hrtimer_start(&ts->sched_timer, expires,
352-
HRTIMER_MODE_ABS);
352+
HRTIMER_MODE_ABS_PINNED);
353353
/* Check, if the timer was already in the past */
354354
if (hrtimer_active(&ts->sched_timer))
355355
goto out;
@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
395395

396396
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
397397
hrtimer_start_expires(&ts->sched_timer,
398-
HRTIMER_MODE_ABS);
398+
HRTIMER_MODE_ABS_PINNED);
399399
/* Check, if the timer was already in the past */
400400
if (hrtimer_active(&ts->sched_timer))
401401
break;
@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void)
698698

699699
for (;;) {
700700
hrtimer_forward(&ts->sched_timer, now, tick_period);
701-
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
701+
hrtimer_start_expires(&ts->sched_timer,
702+
HRTIMER_MODE_ABS_PINNED);
702703
/* Check, if the timer was already in the past */
703704
if (hrtimer_active(&ts->sched_timer))
704705
break;

kernel/timer.c

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <linux/tick.h>
3939
#include <linux/kallsyms.h>
4040
#include <linux/perf_counter.h>
41+
#include <linux/sched.h>
4142

4243
#include <asm/uaccess.h>
4344
#include <asm/unistd.h>
@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
605606
}
606607

607608
static inline int
608-
__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
609+
__mod_timer(struct timer_list *timer, unsigned long expires,
610+
bool pending_only, int pinned)
609611
{
610612
struct tvec_base *base, *new_base;
611613
unsigned long flags;
612-
int ret;
613-
614-
ret = 0;
614+
int ret = 0 , cpu;
615615

616616
timer_stats_timer_set_start_info(timer);
617617
BUG_ON(!timer->function);
@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
630630

631631
new_base = __get_cpu_var(tvec_bases);
632632

633+
cpu = smp_processor_id();
634+
635+
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
636+
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
637+
int preferred_cpu = get_nohz_load_balancer();
638+
639+
if (preferred_cpu >= 0)
640+
cpu = preferred_cpu;
641+
}
642+
#endif
643+
new_base = per_cpu(tvec_bases, cpu);
644+
633645
if (base != new_base) {
634646
/*
635647
* We are trying to schedule the timer on the local CPU.
@@ -669,7 +681,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
669681
*/
670682
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
671683
{
672-
return __mod_timer(timer, expires, true);
684+
return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
673685
}
674686
EXPORT_SYMBOL(mod_timer_pending);
675687

@@ -703,10 +715,32 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
703715
if (timer->expires == expires && timer_pending(timer))
704716
return 1;
705717

706-
return __mod_timer(timer, expires, false);
718+
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
707719
}
708720
EXPORT_SYMBOL(mod_timer);
709721

722+
/**
723+
* mod_timer_pinned - modify a timer's timeout
724+
* @timer: the timer to be modified
725+
* @expires: new timeout in jiffies
726+
*
727+
* mod_timer_pinned() is a way to update the expire field of an
728+
* active timer (if the timer is inactive it will be activated)
729+
* and not allow the timer to be migrated to a different CPU.
730+
*
731+
* mod_timer_pinned(timer, expires) is equivalent to:
732+
*
733+
* del_timer(timer); timer->expires = expires; add_timer(timer);
734+
*/
735+
int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
736+
{
737+
if (timer->expires == expires && timer_pending(timer))
738+
return 1;
739+
740+
return __mod_timer(timer, expires, false, TIMER_PINNED);
741+
}
742+
EXPORT_SYMBOL(mod_timer_pinned);
743+
710744
/**
711745
* add_timer - start a timer
712746
* @timer: the timer to be added
@@ -1017,6 +1051,9 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base)
10171051
index = slot = timer_jiffies & TVN_MASK;
10181052
do {
10191053
list_for_each_entry(nte, varp->vec + slot, entry) {
1054+
if (tbase_get_deferrable(nte->base))
1055+
continue;
1056+
10201057
found = 1;
10211058
if (time_before(nte->expires, expires))
10221059
expires = nte->expires;
@@ -1307,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout)
13071344
expire = timeout + jiffies;
13081345

13091346
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1310-
__mod_timer(&timer, expire, false);
1347+
__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
13111348
schedule();
13121349
del_singleshot_timer_sync(&timer);
13131350

kernel/trace/trace_sysprof.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused)
203203
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204204
hrtimer->function = stack_trace_timer_fn;
205205

206-
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
206+
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
207+
HRTIMER_MODE_REL_PINNED);
207208
}
208209

209210
static void start_stack_timers(void)

0 commit comments

Comments
 (0)