Skip to content

Commit 6acce3e

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched: Remove get_online_cpus() usage
Remove get_online_cpus() usage from the scheduler; there's 4 sites that use it: - sched_init_smp(); where its completely superfluous since we're in 'early' boot and there simply cannot be any hotplugging. - sched_getaffinity(); we already take a raw spinlock to protect the task cpus_allowed mask, this disables preemption and therefore also stabilizes cpu_online_mask as that's modified using stop_machine. However switch to active mask for symmetry with sched_setaffinity()/set_cpus_allowed_ptr(). We guarantee active mask stability by inserting sync_rcu/sched() into _cpu_down. - sched_setaffinity(); we don't appear to need get_online_cpus() either, there's two sites where hotplug appears relevant: * cpuset_cpus_allowed(); for the !cpuset case we use possible_mask, for the cpuset case we hold task_lock, which is a spinlock and thus for mainline disables preemption (might cause pain on RT). * set_cpus_allowed_ptr(); Holds all scheduler locks and thus has preemption properly disabled; also it already deals with hotplug races explicitly where it releases them. - migrate_swap(); we can make stop_two_cpus() do the heavy lifting for us with a little trickery. By adding a sync_sched/rcu() after the CPU_DOWN_PREPARE notifier we can provide preempt/rcu guarantees for cpu_active_mask. Use these to validate that both our cpus are active when queueing the stop work before we queue the stop_machine works for take_cpu_down(). Signed-off-by: Peter Zijlstra <[email protected]> Cc: "Srivatsa S. Bhat" <[email protected]> Cc: Paul McKenney <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Srikar Dronamraju <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Oleg Nesterov <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 7460231 commit 6acce3e

File tree

3 files changed

+48
-15
lines changed

3 files changed

+48
-15
lines changed

kernel/cpu.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,23 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
308308
}
309309
smpboot_park_threads(cpu);
310310

311+
/*
312+
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
313+
* and RCU users of this state to go away such that all new such users
314+
* will observe it.
315+
*
316+
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
317+
* not imply sync_sched(), so explicitly call both.
318+
*/
319+
#ifdef CONFIG_PREEMPT
320+
synchronize_sched();
321+
#endif
322+
synchronize_rcu();
323+
324+
/*
325+
* So now all preempt/rcu users must observe !cpu_active().
326+
*/
327+
311328
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
312329
if (err) {
313330
/* CPU didn't die: tell everyone. Can't complain. */

kernel/sched/core.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,8 +1085,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
10851085
struct migration_swap_arg arg;
10861086
int ret = -EINVAL;
10871087

1088-
get_online_cpus();
1089-
10901088
arg = (struct migration_swap_arg){
10911089
.src_task = cur,
10921090
.src_cpu = task_cpu(cur),
@@ -1097,6 +1095,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
10971095
if (arg.src_cpu == arg.dst_cpu)
10981096
goto out;
10991097

1098+
/*
1099+
* These three tests are all lockless; this is OK since all of them
1100+
* will be re-checked with proper locks held further down the line.
1101+
*/
11001102
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
11011103
goto out;
11021104

@@ -1109,7 +1111,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
11091111
ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
11101112

11111113
out:
1112-
put_online_cpus();
11131114
return ret;
11141115
}
11151116

@@ -3710,7 +3711,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
37103711
struct task_struct *p;
37113712
int retval;
37123713

3713-
get_online_cpus();
37143714
rcu_read_lock();
37153715

37163716
p = find_process_by_pid(pid);
@@ -3773,7 +3773,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
37733773
free_cpumask_var(cpus_allowed);
37743774
out_put_task:
37753775
put_task_struct(p);
3776-
put_online_cpus();
37773776
return retval;
37783777
}
37793778

@@ -3818,7 +3817,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
38183817
unsigned long flags;
38193818
int retval;
38203819

3821-
get_online_cpus();
38223820
rcu_read_lock();
38233821

38243822
retval = -ESRCH;
@@ -3831,12 +3829,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
38313829
goto out_unlock;
38323830

38333831
raw_spin_lock_irqsave(&p->pi_lock, flags);
3834-
cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
3832+
cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
38353833
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
38363834

38373835
out_unlock:
38383836
rcu_read_unlock();
3839-
put_online_cpus();
38403837

38413838
return retval;
38423839
}
@@ -6494,14 +6491,17 @@ void __init sched_init_smp(void)
64946491

64956492
sched_init_numa();
64966493

6497-
get_online_cpus();
6494+
/*
6495+
* There's no userspace yet to cause hotplug operations; hence all the
6496+
* cpu masks are stable and all blatant races in the below code cannot
6497+
* happen.
6498+
*/
64986499
mutex_lock(&sched_domains_mutex);
64996500
init_sched_domains(cpu_active_mask);
65006501
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
65016502
if (cpumask_empty(non_isolated_cpus))
65026503
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
65036504
mutex_unlock(&sched_domains_mutex);
6504-
put_online_cpus();
65056505

65066506
hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
65076507
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);

kernel/stop_machine.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,13 @@ static void irq_cpu_stop_queue_work(void *arg)
234234
*/
235235
int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
236236
{
237-
int call_cpu;
238237
struct cpu_stop_done done;
239238
struct cpu_stop_work work1, work2;
240239
struct irq_cpu_stop_queue_work_info call_args;
241-
struct multi_stop_data msdata = {
240+
struct multi_stop_data msdata;
241+
242+
preempt_disable();
243+
msdata = (struct multi_stop_data){
242244
.fn = fn,
243245
.data = arg,
244246
.num_threads = 2,
@@ -261,17 +263,31 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
261263
cpu_stop_init_done(&done, 2);
262264
set_state(&msdata, MULTI_STOP_PREPARE);
263265

266+
/*
267+
* If we observe both CPUs active we know _cpu_down() cannot yet have
268+
* queued its stop_machine works and therefore ours will get executed
269+
* first. Or its not either one of our CPUs that's getting unplugged,
270+
* in which case we don't care.
271+
*
272+
* This relies on the stopper workqueues to be FIFO.
273+
*/
274+
if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
275+
preempt_enable();
276+
return -ENOENT;
277+
}
278+
264279
/*
265280
* Queuing needs to be done by the lowest numbered CPU, to ensure
266281
* that works are always queued in the same order on every CPU.
267282
* This prevents deadlocks.
268283
*/
269-
call_cpu = min(cpu1, cpu2);
270-
271-
smp_call_function_single(call_cpu, &irq_cpu_stop_queue_work,
284+
smp_call_function_single(min(cpu1, cpu2),
285+
&irq_cpu_stop_queue_work,
272286
&call_args, 0);
287+
preempt_enable();
273288

274289
wait_for_completion(&done.completion);
290+
275291
return done.executed ? done.ret : -ENOENT;
276292
}
277293

0 commit comments

Comments
 (0)