Skip to content

Commit b18ec80

Browse files
Mike GalbraithLinus Torvalds
authored andcommitted
[PATCH] sched: improve migration accuracy
Co-opt rq->timestamp_last_tick to maintain a cache_hot_time evaluation reference timestamp at both tick and sched times to prevent said reference, formerly rq->timestamp_last_tick, from being behind task->last_ran at evaluation time, and to move said reference closer to current time on the remote processor, intent being to improve cache hot evaluation and timestamp adjustment accuracy for task migration. Fix minor sched_time double accounting error which occurs when a task passing through schedule() does not schedule off, and takes the next timer tick. [[email protected]: cleanup] Signed-off-by: Mike Galbraith <[email protected]> Acked-by: Ingo Molnar <[email protected]> Acked-by: Ken Chen <[email protected]> Cc: Don Mullis <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 08c183f commit b18ec80

File tree

1 file changed

+20
-21
lines changed

1 file changed

+20
-21
lines changed

kernel/sched.c

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ struct rq {
225225
unsigned long nr_uninterruptible;
226226

227227
unsigned long expired_timestamp;
228-
unsigned long long timestamp_last_tick;
228+
/* Cached timestamp set by update_cpu_clock() */
229+
unsigned long long most_recent_timestamp;
229230
struct task_struct *curr, *idle;
230231
unsigned long next_balance;
231232
struct mm_struct *prev_mm;
@@ -944,8 +945,8 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local)
944945
if (!local) {
945946
/* Compensate for drifting sched_clock */
946947
struct rq *this_rq = this_rq();
947-
now = (now - this_rq->timestamp_last_tick)
948-
+ rq->timestamp_last_tick;
948+
now = (now - this_rq->most_recent_timestamp)
949+
+ rq->most_recent_timestamp;
949950
}
950951
#endif
951952

@@ -1689,8 +1690,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
16891690
* Not the local CPU - must adjust timestamp. This should
16901691
* get optimised away in the !CONFIG_SMP case.
16911692
*/
1692-
p->timestamp = (p->timestamp - this_rq->timestamp_last_tick)
1693-
+ rq->timestamp_last_tick;
1693+
p->timestamp = (p->timestamp - this_rq->most_recent_timestamp)
1694+
+ rq->most_recent_timestamp;
16941695
__activate_task(p, rq);
16951696
if (TASK_PREEMPTS_CURR(p, rq))
16961697
resched_task(rq->curr);
@@ -2068,8 +2069,8 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array,
20682069
set_task_cpu(p, this_cpu);
20692070
inc_nr_running(p, this_rq);
20702071
enqueue_task(p, this_array);
2071-
p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
2072-
+ this_rq->timestamp_last_tick;
2072+
p->timestamp = (p->timestamp - src_rq->most_recent_timestamp)
2073+
+ this_rq->most_recent_timestamp;
20732074
/*
20742075
* Note that idle threads have a prio of MAX_PRIO, for this test
20752076
* to be always true for them.
@@ -2105,10 +2106,15 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
21052106
* 2) too many balance attempts have failed.
21062107
*/
21072108

2108-
if (sd->nr_balance_failed > sd->cache_nice_tries)
2109+
if (sd->nr_balance_failed > sd->cache_nice_tries) {
2110+
#ifdef CONFIG_SCHEDSTATS
2111+
if (task_hot(p, rq->most_recent_timestamp, sd))
2112+
schedstat_inc(sd, lb_hot_gained[idle]);
2113+
#endif
21092114
return 1;
2115+
}
21102116

2111-
if (task_hot(p, rq->timestamp_last_tick, sd))
2117+
if (task_hot(p, rq->most_recent_timestamp, sd))
21122118
return 0;
21132119
return 1;
21142120
}
@@ -2206,11 +2212,6 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
22062212
goto skip_bitmap;
22072213
}
22082214

2209-
#ifdef CONFIG_SCHEDSTATS
2210-
if (task_hot(tmp, busiest->timestamp_last_tick, sd))
2211-
schedstat_inc(sd, lb_hot_gained[idle]);
2212-
#endif
2213-
22142215
pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
22152216
pulled++;
22162217
rem_load_move -= tmp->load_weight;
@@ -2971,7 +2972,8 @@ EXPORT_PER_CPU_SYMBOL(kstat);
29712972
static inline void
29722973
update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
29732974
{
2974-
p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
2975+
p->sched_time += now - p->last_ran;
2976+
p->last_ran = rq->most_recent_timestamp = now;
29752977
}
29762978

29772979
/*
@@ -2984,8 +2986,7 @@ unsigned long long current_sched_time(const struct task_struct *p)
29842986
unsigned long flags;
29852987

29862988
local_irq_save(flags);
2987-
ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
2988-
ns = p->sched_time + sched_clock() - ns;
2989+
ns = p->sched_time + sched_clock() - p->last_ran;
29892990
local_irq_restore(flags);
29902991

29912992
return ns;
@@ -3176,8 +3177,6 @@ void scheduler_tick(void)
31763177

31773178
update_cpu_clock(p, rq, now);
31783179

3179-
rq->timestamp_last_tick = now;
3180-
31813180
if (p == rq->idle)
31823181
/* Task on the idle queue */
31833182
wake_priority_sleeper(rq);
@@ -5032,8 +5031,8 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
50325031
* afterwards, and pretending it was a local activate.
50335032
* This way is cleaner and logically correct.
50345033
*/
5035-
p->timestamp = p->timestamp - rq_src->timestamp_last_tick
5036-
+ rq_dest->timestamp_last_tick;
5034+
p->timestamp = p->timestamp - rq_src->most_recent_timestamp
5035+
+ rq_dest->most_recent_timestamp;
50375036
deactivate_task(p, rq_src);
50385037
__activate_task(p, rq_dest);
50395038
if (TASK_PREEMPTS_CURR(p, rq_dest))

0 commit comments

Comments
 (0)