Skip to content

Commit c6e7bd7

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/core: Optimize ttwu() spinning on p->on_cpu
Both Rik and Mel reported seeing ttwu() spend significant time on: smp_cond_load_acquire(&p->on_cpu, !VAL); Attempt to avoid this by queueing the wakeup on the CPU that owns the p->on_cpu value. This will then allow the ttwu() to complete without further waiting. Since we run schedule() with interrupts disabled, the IPI is guaranteed to happen after p->on_cpu is cleared, this is what makes it safe to queue early. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Mel Gorman <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Cc: Jirka Hladky <[email protected]> Cc: Vincent Guittot <[email protected]> Cc: [email protected] Cc: Hillf Danton <[email protected]> Cc: Rik van Riel <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent d505b8a commit c6e7bd7

File tree

1 file changed

+31
-21
lines changed

1 file changed

+31
-21
lines changed

kernel/sched/core.c

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2312,7 +2312,7 @@ static void wake_csd_func(void *info)
23122312
sched_ttwu_pending();
23132313
}
23142314

2315-
static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
2315+
static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
23162316
{
23172317
struct rq *rq = cpu_rq(cpu);
23182318

@@ -2354,6 +2354,17 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
23542354
{
23552355
return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
23562356
}
2357+
2358+
static bool ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
2359+
{
2360+
if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
2361+
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
2362+
__ttwu_queue_remote(p, cpu, wake_flags);
2363+
return true;
2364+
}
2365+
2366+
return false;
2367+
}
23572368
#endif /* CONFIG_SMP */
23582369

23592370
static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
@@ -2362,11 +2373,8 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
23622373
struct rq_flags rf;
23632374

23642375
#if defined(CONFIG_SMP)
2365-
if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
2366-
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
2367-
ttwu_queue_remote(p, cpu, wake_flags);
2376+
if (ttwu_queue_remote(p, cpu, wake_flags))
23682377
return;
2369-
}
23702378
#endif
23712379

23722380
rq_lock(rq, &rf);
@@ -2548,7 +2556,15 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
25482556
if (p->on_rq && ttwu_remote(p, wake_flags))
25492557
goto unlock;
25502558

2559+
if (p->in_iowait) {
2560+
delayacct_blkio_end(p);
2561+
atomic_dec(&task_rq(p)->nr_iowait);
2562+
}
2563+
25512564
#ifdef CONFIG_SMP
2565+
p->sched_contributes_to_load = !!task_contributes_to_load(p);
2566+
p->state = TASK_WAKING;
2567+
25522568
/*
25532569
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
25542570
* possible to, falsely, observe p->on_cpu == 0.
@@ -2570,6 +2586,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
25702586
*/
25712587
smp_rmb();
25722588

2589+
/*
2590+
* If the owning (remote) CPU is still in the middle of schedule() with
2591+
* this task as prev, considering queueing p on the remote CPUs wake_list
2592+
* which potentially sends an IPI instead of spinning on p->on_cpu to
2593+
* let the waker make forward progress. This is safe because IRQs are
2594+
* disabled and the IPI will deliver after on_cpu is cleared.
2595+
*/
2596+
if (READ_ONCE(p->on_cpu) && ttwu_queue_remote(p, cpu, wake_flags))
2597+
goto unlock;
2598+
25732599
/*
25742600
* If the owning (remote) CPU is still in the middle of schedule() with
25752601
* this task as prev, wait until its done referencing the task.
@@ -2581,28 +2607,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
25812607
*/
25822608
smp_cond_load_acquire(&p->on_cpu, !VAL);
25832609

2584-
p->sched_contributes_to_load = !!task_contributes_to_load(p);
2585-
p->state = TASK_WAKING;
2586-
2587-
if (p->in_iowait) {
2588-
delayacct_blkio_end(p);
2589-
atomic_dec(&task_rq(p)->nr_iowait);
2590-
}
2591-
25922610
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
25932611
if (task_cpu(p) != cpu) {
25942612
wake_flags |= WF_MIGRATED;
25952613
psi_ttwu_dequeue(p);
25962614
set_task_cpu(p, cpu);
25972615
}
2598-
2599-
#else /* CONFIG_SMP */
2600-
2601-
if (p->in_iowait) {
2602-
delayacct_blkio_end(p);
2603-
atomic_dec(&task_rq(p)->nr_iowait);
2604-
}
2605-
26062616
#endif /* CONFIG_SMP */
26072617

26082618
ttwu_queue(p, cpu, wake_flags);

0 commit comments

Comments
 (0)