Skip to content

Commit 2788cf0

Browse files
Vladimir Davydovtorvalds
authored andcommitted
memcg: reparent list_lrus and free kmemcg_id on css offline
Now, the only reason to keep kmemcg_id till css free is list_lru, which uses it to distribute elements between per-memcg lists. However, it can be easily sorted out - we only need to change kmemcg_id of an offline cgroup to its parent's id, making further list_lru_add()'s add elements to the parent's list, and then move all elements from the offline cgroup's list to the one of its parent. It will work, because a racing list_lru_del() does not need to know the list it is deleting the element from. It can decrement the wrong nr_items counter though, but the ongoing reparenting will fix it. After list_lru reparenting is done we are free to release kmemcg_id saving a valuable slot in a per-memcg array for new cgroups. Signed-off-by: Vladimir Davydov <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Pekka Enberg <[email protected]> Cc: David Rientjes <[email protected]> Cc: Joonsoo Kim <[email protected]> Cc: Dave Chinner <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 3f97b16 commit 2788cf0

File tree

3 files changed

+79
-9
lines changed

3 files changed

+79
-9
lines changed

include/linux/list_lru.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ enum lru_status {
2626

2727
struct list_lru_one {
2828
struct list_head list;
29-
/* kept as signed so we can catch imbalance bugs */
29+
/* may become negative during memcg reparenting */
3030
long nr_items;
3131
};
3232

@@ -62,6 +62,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
6262
#define list_lru_init_memcg(lru) __list_lru_init((lru), true, NULL)
6363

6464
int memcg_update_all_list_lrus(int num_memcgs);
65+
void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
6566

6667
/**
6768
* list_lru_add: add an element to the lru list's tail

mm/list_lru.c

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
100100

101101
spin_lock(&nlru->lock);
102102
l = list_lru_from_kmem(nlru, item);
103-
WARN_ON_ONCE(l->nr_items < 0);
104103
if (list_empty(item)) {
105104
list_add_tail(item, &l->list);
106105
l->nr_items++;
@@ -123,7 +122,6 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
123122
if (!list_empty(item)) {
124123
list_del_init(item);
125124
l->nr_items--;
126-
WARN_ON_ONCE(l->nr_items < 0);
127125
spin_unlock(&nlru->lock);
128126
return true;
129127
}
@@ -156,7 +154,6 @@ static unsigned long __list_lru_count_one(struct list_lru *lru,
156154

157155
spin_lock(&nlru->lock);
158156
l = list_lru_from_memcg_idx(nlru, memcg_idx);
159-
WARN_ON_ONCE(l->nr_items < 0);
160157
count = l->nr_items;
161158
spin_unlock(&nlru->lock);
162159

@@ -458,6 +455,49 @@ int memcg_update_all_list_lrus(int new_size)
458455
memcg_cancel_update_list_lru(lru, old_size, new_size);
459456
goto out;
460457
}
458+
459+
static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
460+
int src_idx, int dst_idx)
461+
{
462+
struct list_lru_one *src, *dst;
463+
464+
/*
465+
* Since list_lru_{add,del} may be called under an IRQ-safe lock,
466+
* we have to use IRQ-safe primitives here to avoid deadlock.
467+
*/
468+
spin_lock_irq(&nlru->lock);
469+
470+
src = list_lru_from_memcg_idx(nlru, src_idx);
471+
dst = list_lru_from_memcg_idx(nlru, dst_idx);
472+
473+
list_splice_init(&src->list, &dst->list);
474+
dst->nr_items += src->nr_items;
475+
src->nr_items = 0;
476+
477+
spin_unlock_irq(&nlru->lock);
478+
}
479+
480+
static void memcg_drain_list_lru(struct list_lru *lru,
481+
int src_idx, int dst_idx)
482+
{
483+
int i;
484+
485+
if (!list_lru_memcg_aware(lru))
486+
return;
487+
488+
for (i = 0; i < nr_node_ids; i++)
489+
memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx);
490+
}
491+
492+
void memcg_drain_all_list_lrus(int src_idx, int dst_idx)
493+
{
494+
struct list_lru *lru;
495+
496+
mutex_lock(&list_lrus_mutex);
497+
list_for_each_entry(lru, &list_lrus, list)
498+
memcg_drain_list_lru(lru, src_idx, dst_idx);
499+
mutex_unlock(&list_lrus_mutex);
500+
}
461501
#else
462502
static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
463503
{

mm/memcontrol.c

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ struct mem_cgroup {
334334
#if defined(CONFIG_MEMCG_KMEM)
335335
/* Index in the kmem_cache->memcg_params.memcg_caches array */
336336
int kmemcg_id;
337+
bool kmem_acct_activated;
337338
bool kmem_acct_active;
338339
#endif
339340

@@ -582,14 +583,10 @@ void memcg_put_cache_ids(void)
582583
struct static_key memcg_kmem_enabled_key;
583584
EXPORT_SYMBOL(memcg_kmem_enabled_key);
584585

585-
static void memcg_free_cache_id(int id);
586-
587586
static void disarm_kmem_keys(struct mem_cgroup *memcg)
588587
{
589-
if (memcg->kmemcg_id >= 0) {
588+
if (memcg->kmem_acct_activated)
590589
static_key_slow_dec(&memcg_kmem_enabled_key);
591-
memcg_free_cache_id(memcg->kmemcg_id);
592-
}
593590
/*
594591
* This check can't live in kmem destruction function,
595592
* since the charges will outlive the cgroup
@@ -3322,6 +3319,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
33223319
int memcg_id;
33233320

33243321
BUG_ON(memcg->kmemcg_id >= 0);
3322+
BUG_ON(memcg->kmem_acct_activated);
33253323
BUG_ON(memcg->kmem_acct_active);
33263324

33273325
/*
@@ -3365,6 +3363,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
33653363
* patched.
33663364
*/
33673365
memcg->kmemcg_id = memcg_id;
3366+
memcg->kmem_acct_activated = true;
33683367
memcg->kmem_acct_active = true;
33693368
out:
33703369
return err;
@@ -4047,6 +4046,10 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
40474046

40484047
static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
40494048
{
4049+
struct cgroup_subsys_state *css;
4050+
struct mem_cgroup *parent, *child;
4051+
int kmemcg_id;
4052+
40504053
if (!memcg->kmem_acct_active)
40514054
return;
40524055

@@ -4059,6 +4062,32 @@ static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
40594062
memcg->kmem_acct_active = false;
40604063

40614064
memcg_deactivate_kmem_caches(memcg);
4065+
4066+
kmemcg_id = memcg->kmemcg_id;
4067+
BUG_ON(kmemcg_id < 0);
4068+
4069+
parent = parent_mem_cgroup(memcg);
4070+
if (!parent)
4071+
parent = root_mem_cgroup;
4072+
4073+
/*
4074+
* Change kmemcg_id of this cgroup and all its descendants to the
4075+
* parent's id, and then move all entries from this cgroup's list_lrus
4076+
* to ones of the parent. After we have finished, all list_lrus
4077+
* corresponding to this cgroup are guaranteed to remain empty. The
4078+
* ordering is imposed by list_lru_node->lock taken by
4079+
* memcg_drain_all_list_lrus().
4080+
*/
4081+
css_for_each_descendant_pre(css, &memcg->css) {
4082+
child = mem_cgroup_from_css(css);
4083+
BUG_ON(child->kmemcg_id != kmemcg_id);
4084+
child->kmemcg_id = parent->kmemcg_id;
4085+
if (!memcg->use_hierarchy)
4086+
break;
4087+
}
4088+
memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id);
4089+
4090+
memcg_free_cache_id(kmemcg_id);
40624091
}
40634092

40644093
static void memcg_destroy_kmem(struct mem_cgroup *memcg)

0 commit comments

Comments
 (0)