Skip to content

Commit 763cfc8

Browse files
committed
Merge branch 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "Two patches to fix a deadlock which can be easily triggered if memcg charge moving is used. This bug was introduced while converting threadgroup locking to a global percpu_rwsem and is caused by cgroup controller task migration path depending on the ability to create new kthreads. cpuset had a similar issue which was fixed by performing heavy-lifting operations asynchronous to task migration. The two patches fix the same issue in memcg in a similar way. The first patch makes the mechanism generic and the second relocates memcg charge moving outside the migration path. Given that we don't want to perform heavy operations while writelocking threadgroup lock anyway, moving them out of the way is a desirable solution. One thing to note is that the problem was difficult to debug because lockdep couldn't figure out the deadlock condition. Looking into how to improve that" * 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: memcg: relocate charge moving from ->attach to ->post_attach cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback
2 parents 3118e5f + 264a0ae commit 763cfc8

File tree

5 files changed

+27
-28
lines changed

5 files changed

+27
-28
lines changed

include/linux/cgroup-defs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ struct cgroup_subsys {
444444
int (*can_attach)(struct cgroup_taskset *tset);
445445
void (*cancel_attach)(struct cgroup_taskset *tset);
446446
void (*attach)(struct cgroup_taskset *tset);
447+
void (*post_attach)(void);
447448
int (*can_fork)(struct task_struct *task);
448449
void (*cancel_fork)(struct task_struct *task);
449450
void (*fork)(struct task_struct *task);

include/linux/cpuset.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
137137
task_unlock(current);
138138
}
139139

140-
extern void cpuset_post_attach_flush(void);
141-
142140
#else /* !CONFIG_CPUSETS */
143141

144142
static inline bool cpusets_enabled(void) { return false; }
@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
245243
return false;
246244
}
247245

248-
static inline void cpuset_post_attach_flush(void)
249-
{
250-
}
251-
252246
#endif /* !CONFIG_CPUSETS */
253247

254248
#endif /* _LINUX_CPUSET_H */

kernel/cgroup.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
28252825
size_t nbytes, loff_t off, bool threadgroup)
28262826
{
28272827
struct task_struct *tsk;
2828+
struct cgroup_subsys *ss;
28282829
struct cgroup *cgrp;
28292830
pid_t pid;
2830-
int ret;
2831+
int ssid, ret;
28312832

28322833
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
28332834
return -EINVAL;
@@ -2875,8 +2876,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
28752876
rcu_read_unlock();
28762877
out_unlock_threadgroup:
28772878
percpu_up_write(&cgroup_threadgroup_rwsem);
2879+
for_each_subsys(ss, ssid)
2880+
if (ss->post_attach)
2881+
ss->post_attach();
28782882
cgroup_kn_unlock(of->kn);
2879-
cpuset_post_attach_flush();
28802883
return ret ?: nbytes;
28812884
}
28822885

kernel/cpuset.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
#include <asm/uaccess.h>
5959
#include <linux/atomic.h>
6060
#include <linux/mutex.h>
61-
#include <linux/workqueue.h>
6261
#include <linux/cgroup.h>
6362
#include <linux/wait.h>
6463

@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
10161015
}
10171016
}
10181017

1019-
void cpuset_post_attach_flush(void)
1018+
static void cpuset_post_attach(void)
10201019
{
10211020
flush_workqueue(cpuset_migrate_mm_wq);
10221021
}
@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
20872086
.can_attach = cpuset_can_attach,
20882087
.cancel_attach = cpuset_cancel_attach,
20892088
.attach = cpuset_attach,
2089+
.post_attach = cpuset_post_attach,
20902090
.bind = cpuset_bind,
20912091
.legacy_cftypes = files,
20922092
.early_init = true,

mm/memcontrol.c

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
207207
/* "mc" and its members are protected by cgroup_mutex */
208208
static struct move_charge_struct {
209209
spinlock_t lock; /* for from, to */
210+
struct mm_struct *mm;
210211
struct mem_cgroup *from;
211212
struct mem_cgroup *to;
212213
unsigned long flags;
@@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void)
46674668

46684669
static void mem_cgroup_clear_mc(void)
46694670
{
4671+
struct mm_struct *mm = mc.mm;
4672+
46704673
/*
46714674
* we must clear moving_task before waking up waiters at the end of
46724675
* task migration.
@@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void)
46764679
spin_lock(&mc.lock);
46774680
mc.from = NULL;
46784681
mc.to = NULL;
4682+
mc.mm = NULL;
46794683
spin_unlock(&mc.lock);
4684+
4685+
mmput(mm);
46804686
}
46814687

46824688
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
47334739
VM_BUG_ON(mc.moved_swap);
47344740

47354741
spin_lock(&mc.lock);
4742+
mc.mm = mm;
47364743
mc.from = from;
47374744
mc.to = memcg;
47384745
mc.flags = move_flags;
@@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
47424749
ret = mem_cgroup_precharge_mc(mm);
47434750
if (ret)
47444751
mem_cgroup_clear_mc();
4752+
} else {
4753+
mmput(mm);
47454754
}
4746-
mmput(mm);
47474755
return ret;
47484756
}
47494757

@@ -4852,11 +4860,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
48524860
return ret;
48534861
}
48544862

4855-
static void mem_cgroup_move_charge(struct mm_struct *mm)
4863+
static void mem_cgroup_move_charge(void)
48564864
{
48574865
struct mm_walk mem_cgroup_move_charge_walk = {
48584866
.pmd_entry = mem_cgroup_move_charge_pte_range,
4859-
.mm = mm,
4867+
.mm = mc.mm,
48604868
};
48614869

48624870
lru_add_drain_all();
@@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
48684876
atomic_inc(&mc.from->moving_account);
48694877
synchronize_rcu();
48704878
retry:
4871-
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
4879+
if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
48724880
/*
48734881
* Someone who are holding the mmap_sem might be waiting in
48744882
* waitq. So we cancel all extra charges, wake up all waiters,
@@ -4885,23 +4893,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
48854893
* additional charge, the page walk just aborts.
48864894
*/
48874895
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
4888-
up_read(&mm->mmap_sem);
4896+
up_read(&mc.mm->mmap_sem);
48894897
atomic_dec(&mc.from->moving_account);
48904898
}
48914899

4892-
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
4900+
static void mem_cgroup_move_task(void)
48934901
{
4894-
struct cgroup_subsys_state *css;
4895-
struct task_struct *p = cgroup_taskset_first(tset, &css);
4896-
struct mm_struct *mm = get_task_mm(p);
4897-
4898-
if (mm) {
4899-
if (mc.to)
4900-
mem_cgroup_move_charge(mm);
4901-
mmput(mm);
4902-
}
4903-
if (mc.to)
4902+
if (mc.to) {
4903+
mem_cgroup_move_charge();
49044904
mem_cgroup_clear_mc();
4905+
}
49054906
}
49064907
#else /* !CONFIG_MMU */
49074908
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
49114912
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
49124913
{
49134914
}
4914-
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
4915+
static void mem_cgroup_move_task(void)
49154916
{
49164917
}
49174918
#endif
@@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
51955196
.css_reset = mem_cgroup_css_reset,
51965197
.can_attach = mem_cgroup_can_attach,
51975198
.cancel_attach = mem_cgroup_cancel_attach,
5198-
.attach = mem_cgroup_move_task,
5199+
.post_attach = mem_cgroup_move_task,
51995200
.bind = mem_cgroup_bind,
52005201
.dfl_cftypes = memory_files,
52015202
.legacy_cftypes = mem_cgroup_legacy_files,

0 commit comments

Comments
 (0)