Skip to content

Commit 9ce70c0

Browse files
Hugh Dickinstorvalds
authored andcommitted
memcg: fix deadlock by inverting lrucare nesting
We have forgotten the rules of lock nesting: the irq-safe ones must be taken inside the non-irq-safe ones, otherwise we are open to deadlock: CPU0 CPU1 ---- ---- lock(&(&pc->lock)->rlock); local_irq_disable(); lock(&(&zone->lru_lock)->rlock); lock(&(&pc->lock)->rlock); <Interrupt> lock(&(&zone->lru_lock)->rlock); To check a different locking issue, I happened to add a spin_lock to memcg's bit_spin_lock in lock_page_cgroup(), and lockdep very quickly complained about __mem_cgroup_commit_charge_lrucare() (on CPU1 above). So delete __mem_cgroup_commit_charge_lrucare(), passing a bool lrucare to __mem_cgroup_commit_charge() instead, taking zone->lru_lock under lock_page_cgroup() in the lrucare case. The original was using spin_lock_irqsave, but we'd be in more trouble if it were ever called at interrupt time: unconditional _irq is enough. And ClearPageLRU before del from lru, SetPageLRU before add to lru: no strong reason, but that is the ordering used consistently elsewhere. Fixes 36b62ad ("memcg: simplify corner case handling of LRU"). Signed-off-by: Hugh Dickins <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Konstantin Khlebnikov <[email protected]> Acked-by: KAMEZAWA Hiroyuki <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 73737b8 commit 9ce70c0

File tree

1 file changed

+37
-35
lines changed

1 file changed

+37
-35
lines changed

mm/memcontrol.c

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2408,8 +2408,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
24082408
struct page *page,
24092409
unsigned int nr_pages,
24102410
struct page_cgroup *pc,
2411-
enum charge_type ctype)
2411+
enum charge_type ctype,
2412+
bool lrucare)
24122413
{
2414+
struct zone *uninitialized_var(zone);
2415+
bool was_on_lru = false;
2416+
24132417
lock_page_cgroup(pc);
24142418
if (unlikely(PageCgroupUsed(pc))) {
24152419
unlock_page_cgroup(pc);
@@ -2420,6 +2424,21 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
24202424
* we don't need page_cgroup_lock about tail pages, becase they are not
24212425
* accessed by any other context at this point.
24222426
*/
2427+
2428+
/*
2429+
* In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
2430+
* may already be on some other mem_cgroup's LRU. Take care of it.
2431+
*/
2432+
if (lrucare) {
2433+
zone = page_zone(page);
2434+
spin_lock_irq(&zone->lru_lock);
2435+
if (PageLRU(page)) {
2436+
ClearPageLRU(page);
2437+
del_page_from_lru_list(zone, page, page_lru(page));
2438+
was_on_lru = true;
2439+
}
2440+
}
2441+
24232442
pc->mem_cgroup = memcg;
24242443
/*
24252444
* We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2443,9 +2462,18 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
24432462
break;
24442463
}
24452464

2465+
if (lrucare) {
2466+
if (was_on_lru) {
2467+
VM_BUG_ON(PageLRU(page));
2468+
SetPageLRU(page);
2469+
add_page_to_lru_list(zone, page, page_lru(page));
2470+
}
2471+
spin_unlock_irq(&zone->lru_lock);
2472+
}
2473+
24462474
mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
24472475
unlock_page_cgroup(pc);
2448-
WARN_ON_ONCE(PageLRU(page));
2476+
24492477
/*
24502478
* "charge_statistics" updated event counter. Then, check it.
24512479
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2643,7 +2671,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
26432671
ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
26442672
if (ret == -ENOMEM)
26452673
return ret;
2646-
__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
2674+
__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype, false);
26472675
return 0;
26482676
}
26492677

@@ -2663,35 +2691,6 @@ static void
26632691
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
26642692
enum charge_type ctype);
26652693

2666-
static void
2667-
__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
2668-
enum charge_type ctype)
2669-
{
2670-
struct page_cgroup *pc = lookup_page_cgroup(page);
2671-
struct zone *zone = page_zone(page);
2672-
unsigned long flags;
2673-
bool removed = false;
2674-
2675-
/*
2676-
* In some case, SwapCache, FUSE(splice_buf->radixtree), the page
2677-
* is already on LRU. It means the page may on some other page_cgroup's
2678-
* LRU. Take care of it.
2679-
*/
2680-
spin_lock_irqsave(&zone->lru_lock, flags);
2681-
if (PageLRU(page)) {
2682-
del_page_from_lru_list(zone, page, page_lru(page));
2683-
ClearPageLRU(page);
2684-
removed = true;
2685-
}
2686-
__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
2687-
if (removed) {
2688-
add_page_to_lru_list(zone, page, page_lru(page));
2689-
SetPageLRU(page);
2690-
}
2691-
spin_unlock_irqrestore(&zone->lru_lock, flags);
2692-
return;
2693-
}
2694-
26952694
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
26962695
gfp_t gfp_mask)
26972696
{
@@ -2769,13 +2768,16 @@ static void
27692768
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
27702769
enum charge_type ctype)
27712770
{
2771+
struct page_cgroup *pc;
2772+
27722773
if (mem_cgroup_disabled())
27732774
return;
27742775
if (!memcg)
27752776
return;
27762777
cgroup_exclude_rmdir(&memcg->css);
27772778

2778-
__mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
2779+
pc = lookup_page_cgroup(page);
2780+
__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype, true);
27792781
/*
27802782
* Now swap is on-memory. This means this page may be
27812783
* counted both as mem and swap....double count.
@@ -3248,7 +3250,7 @@ int mem_cgroup_prepare_migration(struct page *page,
32483250
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
32493251
else
32503252
ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3251-
__mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype);
3253+
__mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype, false);
32523254
return ret;
32533255
}
32543256

@@ -3332,7 +3334,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
33323334
* the newpage may be on LRU(or pagevec for LRU) already. We lock
33333335
* LRU while we overwrite pc->mem_cgroup.
33343336
*/
3335-
__mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
3337+
__mem_cgroup_commit_charge(memcg, newpage, 1, pc, type, true);
33363338
}
33373339

33383340
#ifdef CONFIG_DEBUG_VM

0 commit comments

Comments
 (0)