Skip to content

Commit eea08f3

Browse files
Arun R BharadwajKAGA-KOKO
authored andcommitted
timers: Logic to move non pinned timers
* Arun R Bharadwaj <[email protected]> [2009-04-16 12:11:36]: This patch migrates all non pinned timers and hrtimers to the current idle load balancer, from all the idle CPUs. Timers firing on busy CPUs are not migrated. While migrating hrtimers, care should be taken to check if migrating a hrtimer would result in a latency or not. So we compare the expiry of the hrtimer with the next timer interrupt on the target cpu and migrate the hrtimer only if it expires *after* the next interrupt on the target cpu. So, added a clockevents_get_next_event() helper function to return the next_event on the target cpu's clock_event_device. [ tglx: cleanups and simplifications ] Signed-off-by: Arun R Bharadwaj <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]>
1 parent cd1bb94 commit eea08f3

File tree

6 files changed

+101
-5
lines changed

6 files changed

+101
-5
lines changed

include/linux/clockchips.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg);
143143
#endif
144144

145145
#endif
146+
147+
#ifdef CONFIG_GENERIC_CLOCKEVENTS
148+
extern ktime_t clockevents_get_next_event(int cpu);
149+
#else
150+
static inline ktime_t clockevents_get_next_event(int cpu)
151+
{
152+
return (ktime_t) { .tv64 = KTIME_MAX };
153+
}
154+
#endif

include/linux/sched.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
257257
extern cpumask_var_t nohz_cpu_mask;
258258
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
259259
extern int select_nohz_load_balancer(int cpu);
260+
extern int get_nohz_load_balancer(void);
260261
#else
261262
static inline int select_nohz_load_balancer(int cpu)
262263
{
@@ -1772,6 +1773,17 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
17721773
struct file *file, void __user *buffer, size_t *length,
17731774
loff_t *ppos);
17741775
#endif
1776+
#ifdef CONFIG_SCHED_DEBUG
1777+
static inline unsigned int get_sysctl_timer_migration(void)
1778+
{
1779+
return sysctl_timer_migration;
1780+
}
1781+
#else
1782+
static inline unsigned int get_sysctl_timer_migration(void)
1783+
{
1784+
return 1;
1785+
}
1786+
#endif
17751787
extern unsigned int sysctl_sched_rt_period;
17761788
extern int sysctl_sched_rt_runtime;
17771789

kernel/hrtimer.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <linux/seq_file.h>
4444
#include <linux/err.h>
4545
#include <linux/debugobjects.h>
46+
#include <linux/sched.h>
47+
#include <linux/timer.h>
4648

4749
#include <asm/uaccess.h>
4850

@@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
198200
{
199201
struct hrtimer_clock_base *new_base;
200202
struct hrtimer_cpu_base *new_cpu_base;
203+
int cpu, preferred_cpu = -1;
204+
205+
cpu = smp_processor_id();
206+
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207+
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208+
preferred_cpu = get_nohz_load_balancer();
209+
if (preferred_cpu >= 0)
210+
cpu = preferred_cpu;
211+
}
212+
#endif
201213

202-
new_cpu_base = &__get_cpu_var(hrtimer_bases);
214+
again:
215+
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
203216
new_base = &new_cpu_base->clock_base[base->index];
204217

205218
if (base != new_base) {
@@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
219232
timer->base = NULL;
220233
spin_unlock(&base->cpu_base->lock);
221234
spin_lock(&new_base->cpu_base->lock);
235+
236+
/* Optimized away for NOHZ=n SMP=n */
237+
if (cpu == preferred_cpu) {
238+
/* Calculate clock monotonic expiry time */
239+
#ifdef CONFIG_HIGH_RES_TIMERS
240+
ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
241+
new_base->offset);
242+
#else
243+
ktime_t expires = hrtimer_get_expires(timer);
244+
#endif
245+
246+
/*
247+
* Get the next event on target cpu from the
248+
* clock events layer.
249+
* This covers the highres=off nohz=on case as well.
250+
*/
251+
ktime_t next = clockevents_get_next_event(cpu);
252+
253+
ktime_t delta = ktime_sub(expires, next);
254+
255+
/*
256+
* We do not migrate the timer when it is expiring
257+
* before the next event on the target cpu because
258+
* we cannot reprogram the target cpu hardware and
259+
* we would cause it to fire late.
260+
*/
261+
if (delta.tv64 < 0) {
262+
cpu = smp_processor_id();
263+
spin_unlock(&new_base->cpu_base->lock);
264+
spin_lock(&base->cpu_base->lock);
265+
timer->base = base;
266+
goto again;
267+
}
268+
}
222269
timer->base = new_base;
223270
}
224271
return new_base;
@@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
236283
return base;
237284
}
238285

239-
# define switch_hrtimer_base(t, b) (b)
286+
# define switch_hrtimer_base(t, b, p) (b)
240287

241288
#endif /* !CONFIG_SMP */
242289

kernel/sched.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4244,6 +4244,11 @@ static struct {
42444244
.load_balancer = ATOMIC_INIT(-1),
42454245
};
42464246

4247+
int get_nohz_load_balancer(void)
4248+
{
4249+
return atomic_read(&nohz.load_balancer);
4250+
}
4251+
42474252
/*
42484253
* This routine will try to nominate the ilb (idle load balancing)
42494254
* owner among the cpus whose ticks are stopped. ilb owner will do the idle

kernel/time/clockevents.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/notifier.h>
1919
#include <linux/smp.h>
2020
#include <linux/sysdev.h>
21+
#include <linux/tick.h>
2122

2223
/* The registered clock event devices */
2324
static LIST_HEAD(clockevent_devices);
@@ -251,4 +252,15 @@ void clockevents_notify(unsigned long reason, void *arg)
251252
spin_unlock(&clockevents_lock);
252253
}
253254
EXPORT_SYMBOL_GPL(clockevents_notify);
255+
256+
ktime_t clockevents_get_next_event(int cpu)
257+
{
258+
struct tick_device *td;
259+
struct clock_event_device *dev;
260+
261+
td = &per_cpu(tick_cpu_device, cpu);
262+
dev = td->evtdev;
263+
264+
return dev->next_event;
265+
}
254266
#endif

kernel/timer.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <linux/delay.h>
3838
#include <linux/tick.h>
3939
#include <linux/kallsyms.h>
40+
#include <linux/sched.h>
4041

4142
#include <asm/uaccess.h>
4243
#include <asm/unistd.h>
@@ -609,9 +610,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
609610
{
610611
struct tvec_base *base, *new_base;
611612
unsigned long flags;
612-
int ret;
613-
614-
ret = 0;
613+
int ret = 0 , cpu;
615614

616615
timer_stats_timer_set_start_info(timer);
617616
BUG_ON(!timer->function);
@@ -630,6 +629,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
630629

631630
new_base = __get_cpu_var(tvec_bases);
632631

632+
cpu = smp_processor_id();
633+
634+
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
635+
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
636+
int preferred_cpu = get_nohz_load_balancer();
637+
638+
if (preferred_cpu >= 0)
639+
cpu = preferred_cpu;
640+
}
641+
#endif
642+
new_base = per_cpu(tvec_bases, cpu);
643+
633644
if (base != new_base) {
634645
/*
635646
* We are trying to schedule the timer on the local CPU.

0 commit comments

Comments
 (0)