Skip to content

Commit db2ba40

Browse files
Johannes Weinertorvalds
authored andcommitted
mm: memcontrol: make per-cpu charge cache IRQ-safe for socket accounting
During cgroup2 rollout into production, we started encountering css refcount underflows and css access crashes in the memory controller. Splitting the heavily shared css reference counter into logical users narrowed the imbalance down to the cgroup2 socket memory accounting. The problem turns out to be the per-cpu charge cache. Cgroup1 had a separate socket counter, but the new cgroup2 socket accounting goes through the common charge path that uses a shared per-cpu cache for all memory that is being tracked. Those caches are safe against scheduling preemption, but not against interrupts - such as the newly added packet receive path. When cache draining is interrupted by network RX taking pages out of the cache, the resuming drain operation will put references of in-use pages, thus causing the imbalance. Disable IRQs during all per-cpu charge cache operations. Fixes: f7e1cb6 ("mm: memcontrol: account socket memory in unified hierarchy memory controller") Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Johannes Weiner <[email protected]> Acked-by: Tejun Heo <[email protected]> Cc: "David S. Miller" <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Vladimir Davydov <[email protected]> Cc: <[email protected]> [4.5+] Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 3bb8b65 commit db2ba40

File tree

1 file changed

+22
-9
lines changed

1 file changed

+22
-9
lines changed

mm/memcontrol.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex);
17401740
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
17411741
{
17421742
struct memcg_stock_pcp *stock;
1743+
unsigned long flags;
17431744
bool ret = false;
17441745

17451746
if (nr_pages > CHARGE_BATCH)
17461747
return ret;
17471748

1748-
stock = &get_cpu_var(memcg_stock);
1749+
local_irq_save(flags);
1750+
1751+
stock = this_cpu_ptr(&memcg_stock);
17491752
if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
17501753
stock->nr_pages -= nr_pages;
17511754
ret = true;
17521755
}
1753-
put_cpu_var(memcg_stock);
1756+
1757+
local_irq_restore(flags);
1758+
17541759
return ret;
17551760
}
17561761

@@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock)
17711776
stock->cached = NULL;
17721777
}
17731778

1774-
/*
1775-
* This must be called under preempt disabled or must be called by
1776-
* a thread which is pinned to local cpu.
1777-
*/
17781779
static void drain_local_stock(struct work_struct *dummy)
17791780
{
1780-
struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
1781+
struct memcg_stock_pcp *stock;
1782+
unsigned long flags;
1783+
1784+
local_irq_save(flags);
1785+
1786+
stock = this_cpu_ptr(&memcg_stock);
17811787
drain_stock(stock);
17821788
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
1789+
1790+
local_irq_restore(flags);
17831791
}
17841792

17851793
/*
@@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy)
17881796
*/
17891797
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
17901798
{
1791-
struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
1799+
struct memcg_stock_pcp *stock;
1800+
unsigned long flags;
1801+
1802+
local_irq_save(flags);
17921803

1804+
stock = this_cpu_ptr(&memcg_stock);
17931805
if (stock->cached != memcg) { /* reset if necessary */
17941806
drain_stock(stock);
17951807
stock->cached = memcg;
17961808
}
17971809
stock->nr_pages += nr_pages;
1798-
put_cpu_var(memcg_stock);
1810+
1811+
local_irq_restore(flags);
17991812
}
18001813

18011814
/*

0 commit comments

Comments
 (0)