Skip to content

Commit 857d315

Browse files
author
Peter Zijlstra
committed
sched: Simplify ttwu()
Use guards to reduce gotos and simplify control flow. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Valentin Schneider <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 4eb054f commit 857d315

File tree

1 file changed

+109
-112
lines changed

1 file changed

+109
-112
lines changed

kernel/sched/core.c

Lines changed: 109 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -3733,14 +3733,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
37333733
struct sched_domain *sd;
37343734

37353735
__schedstat_inc(p->stats.nr_wakeups_remote);
3736-
rcu_read_lock();
3736+
3737+
guard(rcu)();
37373738
for_each_domain(rq->cpu, sd) {
37383739
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
37393740
__schedstat_inc(sd->ttwu_wake_remote);
37403741
break;
37413742
}
37423743
}
3743-
rcu_read_unlock();
37443744
}
37453745

37463746
if (wake_flags & WF_MIGRATED)
@@ -4199,10 +4199,9 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
41994199
static int
42004200
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
42014201
{
4202-
unsigned long flags;
4202+
guard(preempt)();
42034203
int cpu, success = 0;
42044204

4205-
preempt_disable();
42064205
if (p == current) {
42074206
/*
42084207
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4229,129 +4228,127 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
42294228
* reordered with p->state check below. This pairs with smp_store_mb()
42304229
* in set_current_state() that the waiting thread does.
42314230
*/
4232-
raw_spin_lock_irqsave(&p->pi_lock, flags);
4233-
smp_mb__after_spinlock();
4234-
if (!ttwu_state_match(p, state, &success))
4235-
goto unlock;
4231+
scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
4232+
smp_mb__after_spinlock();
4233+
if (!ttwu_state_match(p, state, &success))
4234+
break;
42364235

4237-
trace_sched_waking(p);
4236+
trace_sched_waking(p);
42384237

4239-
/*
4240-
* Ensure we load p->on_rq _after_ p->state, otherwise it would
4241-
* be possible to, falsely, observe p->on_rq == 0 and get stuck
4242-
* in smp_cond_load_acquire() below.
4243-
*
4244-
* sched_ttwu_pending() try_to_wake_up()
4245-
* STORE p->on_rq = 1 LOAD p->state
4246-
* UNLOCK rq->lock
4247-
*
4248-
* __schedule() (switch to task 'p')
4249-
* LOCK rq->lock smp_rmb();
4250-
* smp_mb__after_spinlock();
4251-
* UNLOCK rq->lock
4252-
*
4253-
* [task p]
4254-
* STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq
4255-
*
4256-
* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
4257-
* __schedule(). See the comment for smp_mb__after_spinlock().
4258-
*
4259-
* A similar smb_rmb() lives in try_invoke_on_locked_down_task().
4260-
*/
4261-
smp_rmb();
4262-
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
4263-
goto unlock;
4238+
/*
4239+
* Ensure we load p->on_rq _after_ p->state, otherwise it would
4240+
* be possible to, falsely, observe p->on_rq == 0 and get stuck
4241+
* in smp_cond_load_acquire() below.
4242+
*
4243+
* sched_ttwu_pending() try_to_wake_up()
4244+
* STORE p->on_rq = 1 LOAD p->state
4245+
* UNLOCK rq->lock
4246+
*
4247+
* __schedule() (switch to task 'p')
4248+
* LOCK rq->lock smp_rmb();
4249+
* smp_mb__after_spinlock();
4250+
* UNLOCK rq->lock
4251+
*
4252+
* [task p]
4253+
* STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq
4254+
*
4255+
* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
4256+
* __schedule(). See the comment for smp_mb__after_spinlock().
4257+
*
4258+
* A similar smb_rmb() lives in try_invoke_on_locked_down_task().
4259+
*/
4260+
smp_rmb();
4261+
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
4262+
break;
42644263

42654264
#ifdef CONFIG_SMP
4266-
/*
4267-
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
4268-
* possible to, falsely, observe p->on_cpu == 0.
4269-
*
4270-
* One must be running (->on_cpu == 1) in order to remove oneself
4271-
* from the runqueue.
4272-
*
4273-
* __schedule() (switch to task 'p') try_to_wake_up()
4274-
* STORE p->on_cpu = 1 LOAD p->on_rq
4275-
* UNLOCK rq->lock
4276-
*
4277-
* __schedule() (put 'p' to sleep)
4278-
* LOCK rq->lock smp_rmb();
4279-
* smp_mb__after_spinlock();
4280-
* STORE p->on_rq = 0 LOAD p->on_cpu
4281-
*
4282-
* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
4283-
* __schedule(). See the comment for smp_mb__after_spinlock().
4284-
*
4285-
* Form a control-dep-acquire with p->on_rq == 0 above, to ensure
4286-
* schedule()'s deactivate_task() has 'happened' and p will no longer
4287-
* care about it's own p->state. See the comment in __schedule().
4288-
*/
4289-
smp_acquire__after_ctrl_dep();
4265+
/*
4266+
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
4267+
* possible to, falsely, observe p->on_cpu == 0.
4268+
*
4269+
* One must be running (->on_cpu == 1) in order to remove oneself
4270+
* from the runqueue.
4271+
*
4272+
* __schedule() (switch to task 'p') try_to_wake_up()
4273+
* STORE p->on_cpu = 1 LOAD p->on_rq
4274+
* UNLOCK rq->lock
4275+
*
4276+
* __schedule() (put 'p' to sleep)
4277+
* LOCK rq->lock smp_rmb();
4278+
* smp_mb__after_spinlock();
4279+
* STORE p->on_rq = 0 LOAD p->on_cpu
4280+
*
4281+
* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
4282+
* __schedule(). See the comment for smp_mb__after_spinlock().
4283+
*
4284+
* Form a control-dep-acquire with p->on_rq == 0 above, to ensure
4285+
* schedule()'s deactivate_task() has 'happened' and p will no longer
4286+
* care about it's own p->state. See the comment in __schedule().
4287+
*/
4288+
smp_acquire__after_ctrl_dep();
42904289

4291-
/*
4292-
* We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
4293-
* == 0), which means we need to do an enqueue, change p->state to
4294-
* TASK_WAKING such that we can unlock p->pi_lock before doing the
4295-
* enqueue, such as ttwu_queue_wakelist().
4296-
*/
4297-
WRITE_ONCE(p->__state, TASK_WAKING);
4290+
/*
4291+
* We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
4292+
* == 0), which means we need to do an enqueue, change p->state to
4293+
* TASK_WAKING such that we can unlock p->pi_lock before doing the
4294+
* enqueue, such as ttwu_queue_wakelist().
4295+
*/
4296+
WRITE_ONCE(p->__state, TASK_WAKING);
42984297

4299-
/*
4300-
* If the owning (remote) CPU is still in the middle of schedule() with
4301-
* this task as prev, considering queueing p on the remote CPUs wake_list
4302-
* which potentially sends an IPI instead of spinning on p->on_cpu to
4303-
* let the waker make forward progress. This is safe because IRQs are
4304-
* disabled and the IPI will deliver after on_cpu is cleared.
4305-
*
4306-
* Ensure we load task_cpu(p) after p->on_cpu:
4307-
*
4308-
* set_task_cpu(p, cpu);
4309-
* STORE p->cpu = @cpu
4310-
* __schedule() (switch to task 'p')
4311-
* LOCK rq->lock
4312-
* smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
4313-
* STORE p->on_cpu = 1 LOAD p->cpu
4314-
*
4315-
* to ensure we observe the correct CPU on which the task is currently
4316-
* scheduling.
4317-
*/
4318-
if (smp_load_acquire(&p->on_cpu) &&
4319-
ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
4320-
goto unlock;
4298+
/*
4299+
* If the owning (remote) CPU is still in the middle of schedule() with
4300+
* this task as prev, considering queueing p on the remote CPUs wake_list
4301+
* which potentially sends an IPI instead of spinning on p->on_cpu to
4302+
* let the waker make forward progress. This is safe because IRQs are
4303+
* disabled and the IPI will deliver after on_cpu is cleared.
4304+
*
4305+
* Ensure we load task_cpu(p) after p->on_cpu:
4306+
*
4307+
* set_task_cpu(p, cpu);
4308+
* STORE p->cpu = @cpu
4309+
* __schedule() (switch to task 'p')
4310+
* LOCK rq->lock
4311+
* smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
4312+
* STORE p->on_cpu = 1 LOAD p->cpu
4313+
*
4314+
* to ensure we observe the correct CPU on which the task is currently
4315+
* scheduling.
4316+
*/
4317+
if (smp_load_acquire(&p->on_cpu) &&
4318+
ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
4319+
break;
43214320

4322-
/*
4323-
* If the owning (remote) CPU is still in the middle of schedule() with
4324-
* this task as prev, wait until it's done referencing the task.
4325-
*
4326-
* Pairs with the smp_store_release() in finish_task().
4327-
*
4328-
* This ensures that tasks getting woken will be fully ordered against
4329-
* their previous state and preserve Program Order.
4330-
*/
4331-
smp_cond_load_acquire(&p->on_cpu, !VAL);
4321+
/*
4322+
* If the owning (remote) CPU is still in the middle of schedule() with
4323+
* this task as prev, wait until it's done referencing the task.
4324+
*
4325+
* Pairs with the smp_store_release() in finish_task().
4326+
*
4327+
* This ensures that tasks getting woken will be fully ordered against
4328+
* their previous state and preserve Program Order.
4329+
*/
4330+
smp_cond_load_acquire(&p->on_cpu, !VAL);
43324331

4333-
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
4334-
if (task_cpu(p) != cpu) {
4335-
if (p->in_iowait) {
4336-
delayacct_blkio_end(p);
4337-
atomic_dec(&task_rq(p)->nr_iowait);
4338-
}
4332+
cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
4333+
if (task_cpu(p) != cpu) {
4334+
if (p->in_iowait) {
4335+
delayacct_blkio_end(p);
4336+
atomic_dec(&task_rq(p)->nr_iowait);
4337+
}
43394338

4340-
wake_flags |= WF_MIGRATED;
4341-
psi_ttwu_dequeue(p);
4342-
set_task_cpu(p, cpu);
4343-
}
4339+
wake_flags |= WF_MIGRATED;
4340+
psi_ttwu_dequeue(p);
4341+
set_task_cpu(p, cpu);
4342+
}
43444343
#else
4345-
cpu = task_cpu(p);
4344+
cpu = task_cpu(p);
43464345
#endif /* CONFIG_SMP */
43474346

4348-
ttwu_queue(p, cpu, wake_flags);
4349-
unlock:
4350-
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4347+
ttwu_queue(p, cpu, wake_flags);
4348+
}
43514349
out:
43524350
if (success)
43534351
ttwu_stat(p, task_cpu(p), wake_flags);
4354-
preempt_enable();
43554352

43564353
return success;
43574354
}

0 commit comments

Comments
 (0)