Skip to content

Commit f9f9e7b

Browse files
committed
Revert "cgroup: simplify threadgroup locking"
This reverts commit b5ba75b. d59cfc0 ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") and b5ba75b ("cgroup: simplify threadgroup locking") changed how cgroup synchronizes against task fork and exits so that it uses global percpu_rwsem instead of per-process rwsem; unfortunately, the write [un]lock paths of percpu_rwsem always involve synchronize_rcu_expedited() which turned out to be too expensive. Improvements for percpu_rwsem are scheduled to be merged in the coming v4.4-rc1 merge window which alleviates this issue. For now, revert the two commits to restore per-process rwsem. They will be re-applied for the v4.4-rc1 merge window. Signed-off-by: Tejun Heo <[email protected]> Link: http://lkml.kernel.org/g/[email protected] Reported-by: Christian Borntraeger <[email protected]> Cc: Oleg Nesterov <[email protected]> Cc: "Paul E. McKenney" <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: [email protected] # v4.2+
1 parent 6ff33f3 commit f9f9e7b

File tree

1 file changed

+33
-12
lines changed

1 file changed

+33
-12
lines changed

kernel/cgroup.c

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2460,13 +2460,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
24602460
if (!cgrp)
24612461
return -ENODEV;
24622462

2463-
percpu_down_write(&cgroup_threadgroup_rwsem);
2463+
retry_find_task:
24642464
rcu_read_lock();
24652465
if (pid) {
24662466
tsk = find_task_by_vpid(pid);
24672467
if (!tsk) {
2468+
rcu_read_unlock();
24682469
ret = -ESRCH;
2469-
goto out_unlock_rcu;
2470+
goto out_unlock_cgroup;
24702471
}
24712472
} else {
24722473
tsk = current;
@@ -2482,23 +2483,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
24822483
*/
24832484
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
24842485
ret = -EINVAL;
2485-
goto out_unlock_rcu;
2486+
rcu_read_unlock();
2487+
goto out_unlock_cgroup;
24862488
}
24872489

24882490
get_task_struct(tsk);
24892491
rcu_read_unlock();
24902492

2493+
percpu_down_write(&cgroup_threadgroup_rwsem);
2494+
if (threadgroup) {
2495+
if (!thread_group_leader(tsk)) {
2496+
/*
2497+
* a race with de_thread from another thread's exec()
2498+
* may strip us of our leadership, if this happens,
2499+
* there is no choice but to throw this task away and
2500+
* try again; this is
2501+
* "double-double-toil-and-trouble-check locking".
2502+
*/
2503+
percpu_up_write(&cgroup_threadgroup_rwsem);
2504+
put_task_struct(tsk);
2505+
goto retry_find_task;
2506+
}
2507+
}
2508+
24912509
ret = cgroup_procs_write_permission(tsk, cgrp, of);
24922510
if (!ret)
24932511
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
24942512

2495-
put_task_struct(tsk);
2496-
goto out_unlock_threadgroup;
2497-
2498-
out_unlock_rcu:
2499-
rcu_read_unlock();
2500-
out_unlock_threadgroup:
25012513
percpu_up_write(&cgroup_threadgroup_rwsem);
2514+
2515+
put_task_struct(tsk);
2516+
out_unlock_cgroup:
25022517
cgroup_kn_unlock(of->kn);
25032518
return ret ?: nbytes;
25042519
}
@@ -2643,8 +2658,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
26432658

26442659
lockdep_assert_held(&cgroup_mutex);
26452660

2646-
percpu_down_write(&cgroup_threadgroup_rwsem);
2647-
26482661
/* look up all csses currently attached to @cgrp's subtree */
26492662
down_read(&css_set_rwsem);
26502663
css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2700,8 +2713,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
27002713
goto out_finish;
27012714
last_task = task;
27022715

2716+
percpu_down_write(&cgroup_threadgroup_rwsem);
2717+
/* raced against de_thread() from another thread? */
2718+
if (!thread_group_leader(task)) {
2719+
percpu_up_write(&cgroup_threadgroup_rwsem);
2720+
put_task_struct(task);
2721+
continue;
2722+
}
2723+
27032724
ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
27042725

2726+
percpu_up_write(&cgroup_threadgroup_rwsem);
27052727
put_task_struct(task);
27062728

27072729
if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2711,7 +2733,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
27112733

27122734
out_finish:
27132735
cgroup_migrate_finish(&preloaded_csets);
2714-
percpu_up_write(&cgroup_threadgroup_rwsem);
27152736
return ret;
27162737
}
27172738

0 commit comments

Comments
 (0)