Skip to content

Commit e4a52bc

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched: Remove rq->lock from the first half of ttwu()
Currently ttwu() does two rq->lock acquisitions, once on the task's old rq, holding it over the p->state fiddling and load-balance pass. Then it drops the old rq->lock to acquire the new rq->lock. By having serialized ttwu(), p->sched_class, p->cpus_allowed with p->pi_lock, we can now drop the whole first rq->lock acquisition. The p->pi_lock serializing concurrent ttwu() calls protects p->state, which we will set to TASK_WAKING to bridge possible p->pi_lock to rq->lock gaps and serialize set_task_cpu() calls against task_rq_lock(). The p->pi_lock serialization of p->sched_class allows us to call scheduling class methods without holding the rq->lock, and the serialization of p->cpus_allowed allows us to do the load-balancing bits without races. Reviewed-by: Frank Rowand <[email protected]> Cc: Mike Galbraith <[email protected]> Cc: Nick Piggin <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Andrew Morton <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Signed-off-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected]
1 parent 8f42ced commit e4a52bc

File tree

1 file changed

+37
-28
lines changed

1 file changed

+37
-28
lines changed

kernel/sched.c

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,69 +2493,78 @@ ttwu_post_activation(struct task_struct *p, struct rq *rq, int wake_flags)
24932493
* Returns %true if @p was woken up, %false if it was already running
24942494
* or @state didn't match @p's state.
24952495
*/
2496-
static int try_to_wake_up(struct task_struct *p, unsigned int state,
2497-
int wake_flags)
2496+
static int
2497+
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
24982498
{
2499-
int cpu, orig_cpu, this_cpu, success = 0;
2499+
int cpu, this_cpu, success = 0;
25002500
unsigned long flags;
2501-
unsigned long en_flags = ENQUEUE_WAKEUP;
25022501
struct rq *rq;
25032502

25042503
this_cpu = get_cpu();
25052504

25062505
smp_wmb();
25072506
raw_spin_lock_irqsave(&p->pi_lock, flags);
2508-
rq = __task_rq_lock(p);
25092507
if (!(p->state & state))
25102508
goto out;
25112509

25122510
cpu = task_cpu(p);
25132511

2514-
if (p->on_rq)
2515-
goto out_running;
2512+
if (p->on_rq) {
2513+
rq = __task_rq_lock(p);
2514+
if (p->on_rq)
2515+
goto out_running;
2516+
__task_rq_unlock(rq);
2517+
}
25162518

2517-
orig_cpu = cpu;
25182519
#ifdef CONFIG_SMP
2519-
if (unlikely(task_running(rq, p)))
2520-
goto out_activate;
2520+
while (p->on_cpu) {
2521+
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2522+
/*
2523+
* If called from interrupt context we could have landed in the
2524+
* middle of schedule(), in this case we should take care not
2525+
* to spin on ->on_cpu if p is current, since that would
2526+
* deadlock.
2527+
*/
2528+
if (p == current)
2529+
goto out_activate;
2530+
#endif
2531+
cpu_relax();
2532+
}
2533+
/*
2534+
* Pairs with the smp_wmb() in finish_lock_switch().
2535+
*/
2536+
smp_rmb();
25212537

25222538
p->sched_contributes_to_load = !!task_contributes_to_load(p);
25232539
p->state = TASK_WAKING;
25242540

2525-
if (p->sched_class->task_waking) {
2541+
if (p->sched_class->task_waking)
25262542
p->sched_class->task_waking(p);
2527-
en_flags |= ENQUEUE_WAKING;
2528-
}
25292543

25302544
cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2531-
if (cpu != orig_cpu)
2532-
set_task_cpu(p, cpu);
2533-
__task_rq_unlock(rq);
2545+
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2546+
out_activate:
2547+
#endif
2548+
#endif /* CONFIG_SMP */
25342549

25352550
rq = cpu_rq(cpu);
25362551
raw_spin_lock(&rq->lock);
25372552

2538-
/*
2539-
* We migrated the task without holding either rq->lock, however
2540-
* since the task is not on the task list itself, nobody else
2541-
* will try and migrate the task, hence the rq should match the
2542-
* cpu we just moved it to.
2543-
*/
2544-
WARN_ON(task_cpu(p) != cpu);
2545-
WARN_ON(p->state != TASK_WAKING);
2553+
#ifdef CONFIG_SMP
2554+
if (cpu != task_cpu(p))
2555+
set_task_cpu(p, cpu);
25462556

25472557
if (p->sched_contributes_to_load)
25482558
rq->nr_uninterruptible--;
2559+
#endif
25492560

2550-
out_activate:
2551-
#endif /* CONFIG_SMP */
2552-
ttwu_activate(rq, p, en_flags);
2561+
ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
25532562
out_running:
25542563
ttwu_post_activation(p, rq, wake_flags);
25552564
ttwu_stat(rq, p, cpu, wake_flags);
25562565
success = 1;
2557-
out:
25582566
__task_rq_unlock(rq);
2567+
out:
25592568
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
25602569
put_cpu();
25612570

0 commit comments

Comments
 (0)