Skip to content

Commit 9855609

Browse files
rgushchintorvalds
authored andcommitted
mm: memcg/slab: use a single set of kmem_caches for all accounted allocations
This is fairly big but mostly red patch, which makes all accounted slab allocations use a single set of kmem_caches instead of creating a separate set for each memory cgroup. Because the number of non-root kmem_caches is now capped by the number of root kmem_caches, there is no need to shrink or destroy them prematurely. They can be perfectly destroyed together with their root counterparts. This allows to dramatically simplify the management of non-root kmem_caches and delete a ton of code. This patch performs the following changes: 1) introduces memcg_params.memcg_cache pointer to represent the kmem_cache which will be used for all non-root allocations 2) reuses the existing memcg kmem_cache creation mechanism to create memcg kmem_cache on the first allocation attempt 3) memcg kmem_caches are named <kmemcache_name>-memcg, e.g. dentry-memcg 4) simplifies memcg_kmem_get_cache() to just return memcg kmem_cache or schedule it's creation and return the root cache 5) removes almost all non-root kmem_cache management code (separate refcounter, reparenting, shrinking, etc) 6) makes slab debugfs to display root_mem_cgroup css id and never show :dead and :deact flags in the memcg_slabinfo attribute. Following patches in the series will simplify the kmem_cache creation. Signed-off-by: Roman Gushchin <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Reviewed-by: Vlastimil Babka <[email protected]> Reviewed-by: Shakeel Butt <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Tejun Heo <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0f876e4 commit 9855609

File tree

7 files changed

+134
-698
lines changed

7 files changed

+134
-698
lines changed

include/linux/memcontrol.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,6 @@ struct mem_cgroup {
317317
/* Index in the kmem_cache->memcg_params.memcg_caches array */
318318
int kmemcg_id;
319319
enum memcg_kmem_state kmem_state;
320-
struct list_head kmem_caches;
321320
struct obj_cgroup __rcu *objcg;
322321
struct list_head objcg_list; /* list of inherited objcgs */
323322
#endif
@@ -1404,9 +1403,7 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
14041403
}
14051404
#endif
14061405

1407-
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep,
1408-
struct obj_cgroup **objcgp);
1409-
void memcg_kmem_put_cache(struct kmem_cache *cachep);
1406+
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
14101407

14111408
#ifdef CONFIG_MEMCG_KMEM
14121409
int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,

include/linux/slab.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,7 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name,
155155
void kmem_cache_destroy(struct kmem_cache *);
156156
int kmem_cache_shrink(struct kmem_cache *);
157157

158-
void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *);
159-
void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *);
158+
void memcg_create_kmem_cache(struct kmem_cache *cachep);
160159

161160
/*
162161
* Please use this macro to create slab caches. Simply specify the
@@ -580,8 +579,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
580579
return __kmalloc_node(size, flags, node);
581580
}
582581

583-
int memcg_update_all_caches(int num_memcgs);
584-
585582
/**
586583
* kmalloc_array - allocate memory for an array.
587584
* @n: number of elements.

mm/memcontrol.c

Lines changed: 32 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
350350
}
351351

352352
/*
353-
* This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
353+
* This will be used as a shrinker list's index.
354354
* The main reason for not using cgroup id for this:
355355
* this works better in sparse environments, where we have a lot of memcgs,
356356
* but only a few kmem-limited. Or also, if we have, for instance, 200
@@ -569,20 +569,16 @@ ino_t page_cgroup_ino(struct page *page)
569569
unsigned long ino = 0;
570570

571571
rcu_read_lock();
572-
if (PageSlab(page) && !PageTail(page)) {
573-
memcg = memcg_from_slab_page(page);
574-
} else {
575-
memcg = page->mem_cgroup;
572+
memcg = page->mem_cgroup;
576573

577-
/*
578-
* The lowest bit set means that memcg isn't a valid
579-
* memcg pointer, but a obj_cgroups pointer.
580-
* In this case the page is shared and doesn't belong
581-
* to any specific memory cgroup.
582-
*/
583-
if ((unsigned long) memcg & 0x1UL)
584-
memcg = NULL;
585-
}
574+
/*
575+
* The lowest bit set means that memcg isn't a valid
576+
* memcg pointer, but a obj_cgroups pointer.
577+
* In this case the page is shared and doesn't belong
578+
* to any specific memory cgroup.
579+
*/
580+
if ((unsigned long) memcg & 0x1UL)
581+
memcg = NULL;
586582

587583
while (memcg && !(memcg->css.flags & CSS_ONLINE))
588584
memcg = parent_mem_cgroup(memcg);
@@ -2822,12 +2818,18 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p)
28222818
page = virt_to_head_page(p);
28232819

28242820
/*
2825-
* Slab pages don't have page->mem_cgroup set because corresponding
2826-
* kmem caches can be reparented during the lifetime. That's why
2827-
* memcg_from_slab_page() should be used instead.
2821+
* Slab objects are accounted individually, not per-page.
2822+
* Memcg membership data for each individual object is saved in
2823+
* the page->obj_cgroups.
28282824
*/
2829-
if (PageSlab(page))
2830-
return memcg_from_slab_page(page);
2825+
if (page_has_obj_cgroups(page)) {
2826+
struct obj_cgroup *objcg;
2827+
unsigned int off;
2828+
2829+
off = obj_to_index(page->slab_cache, page, p);
2830+
objcg = page_obj_cgroups(page)[off];
2831+
return obj_cgroup_memcg(objcg);
2832+
}
28312833

28322834
/* All other pages use page->mem_cgroup */
28332835
return page->mem_cgroup;
@@ -2882,9 +2884,7 @@ static int memcg_alloc_cache_id(void)
28822884
else if (size > MEMCG_CACHES_MAX_SIZE)
28832885
size = MEMCG_CACHES_MAX_SIZE;
28842886

2885-
err = memcg_update_all_caches(size);
2886-
if (!err)
2887-
err = memcg_update_all_list_lrus(size);
2887+
err = memcg_update_all_list_lrus(size);
28882888
if (!err)
28892889
memcg_nr_cache_ids = size;
28902890

@@ -2903,7 +2903,6 @@ static void memcg_free_cache_id(int id)
29032903
}
29042904

29052905
struct memcg_kmem_cache_create_work {
2906-
struct mem_cgroup *memcg;
29072906
struct kmem_cache *cachep;
29082907
struct work_struct work;
29092908
};
@@ -2912,136 +2911,51 @@ static void memcg_kmem_cache_create_func(struct work_struct *w)
29122911
{
29132912
struct memcg_kmem_cache_create_work *cw =
29142913
container_of(w, struct memcg_kmem_cache_create_work, work);
2915-
struct mem_cgroup *memcg = cw->memcg;
29162914
struct kmem_cache *cachep = cw->cachep;
29172915

2918-
memcg_create_kmem_cache(memcg, cachep);
2916+
memcg_create_kmem_cache(cachep);
29192917

2920-
css_put(&memcg->css);
29212918
kfree(cw);
29222919
}
29232920

29242921
/*
29252922
* Enqueue the creation of a per-memcg kmem_cache.
29262923
*/
2927-
static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
2928-
struct kmem_cache *cachep)
2924+
static void memcg_schedule_kmem_cache_create(struct kmem_cache *cachep)
29292925
{
29302926
struct memcg_kmem_cache_create_work *cw;
29312927

2932-
if (!css_tryget_online(&memcg->css))
2933-
return;
2934-
29352928
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
2936-
if (!cw) {
2937-
css_put(&memcg->css);
2929+
if (!cw)
29382930
return;
2939-
}
29402931

2941-
cw->memcg = memcg;
29422932
cw->cachep = cachep;
29432933
INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
29442934

29452935
queue_work(memcg_kmem_cache_wq, &cw->work);
29462936
}
29472937

29482938
/**
2949-
* memcg_kmem_get_cache: select the correct per-memcg cache for allocation
2939+
* memcg_kmem_get_cache: select memcg or root cache for allocation
29502940
* @cachep: the original global kmem cache
29512941
*
29522942
* Return the kmem_cache we're supposed to use for a slab allocation.
2953-
* We try to use the current memcg's version of the cache.
29542943
*
29552944
* If the cache does not exist yet, if we are the first user of it, we
29562945
* create it asynchronously in a workqueue and let the current allocation
29572946
* go through with the original cache.
2958-
*
2959-
* This function takes a reference to the cache it returns to assure it
2960-
* won't get destroyed while we are working with it. Once the caller is
2961-
* done with it, memcg_kmem_put_cache() must be called to release the
2962-
* reference.
29632947
*/
2964-
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep,
2965-
struct obj_cgroup **objcgp)
2948+
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
29662949
{
2967-
struct mem_cgroup *memcg;
29682950
struct kmem_cache *memcg_cachep;
2969-
struct memcg_cache_array *arr;
2970-
int kmemcg_id;
29712951

2972-
VM_BUG_ON(!is_root_cache(cachep));
2973-
2974-
if (memcg_kmem_bypass())
2952+
memcg_cachep = READ_ONCE(cachep->memcg_params.memcg_cache);
2953+
if (unlikely(!memcg_cachep)) {
2954+
memcg_schedule_kmem_cache_create(cachep);
29752955
return cachep;
2976-
2977-
rcu_read_lock();
2978-
2979-
if (unlikely(current->active_memcg))
2980-
memcg = current->active_memcg;
2981-
else
2982-
memcg = mem_cgroup_from_task(current);
2983-
2984-
if (!memcg || memcg == root_mem_cgroup)
2985-
goto out_unlock;
2986-
2987-
kmemcg_id = READ_ONCE(memcg->kmemcg_id);
2988-
if (kmemcg_id < 0)
2989-
goto out_unlock;
2990-
2991-
arr = rcu_dereference(cachep->memcg_params.memcg_caches);
2992-
2993-
/*
2994-
* Make sure we will access the up-to-date value. The code updating
2995-
* memcg_caches issues a write barrier to match the data dependency
2996-
* barrier inside READ_ONCE() (see memcg_create_kmem_cache()).
2997-
*/
2998-
memcg_cachep = READ_ONCE(arr->entries[kmemcg_id]);
2999-
3000-
/*
3001-
* If we are in a safe context (can wait, and not in interrupt
3002-
* context), we could be be predictable and return right away.
3003-
* This would guarantee that the allocation being performed
3004-
* already belongs in the new cache.
3005-
*
3006-
* However, there are some clashes that can arrive from locking.
3007-
* For instance, because we acquire the slab_mutex while doing
3008-
* memcg_create_kmem_cache, this means no further allocation
3009-
* could happen with the slab_mutex held. So it's better to
3010-
* defer everything.
3011-
*
3012-
* If the memcg is dying or memcg_cache is about to be released,
3013-
* don't bother creating new kmem_caches. Because memcg_cachep
3014-
* is ZEROed as the fist step of kmem offlining, we don't need
3015-
* percpu_ref_tryget_live() here. css_tryget_online() check in
3016-
* memcg_schedule_kmem_cache_create() will prevent us from
3017-
* creation of a new kmem_cache.
3018-
*/
3019-
if (unlikely(!memcg_cachep))
3020-
memcg_schedule_kmem_cache_create(memcg, cachep);
3021-
else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt)) {
3022-
struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
3023-
3024-
if (!objcg || !obj_cgroup_tryget(objcg)) {
3025-
percpu_ref_put(&memcg_cachep->memcg_params.refcnt);
3026-
goto out_unlock;
3027-
}
3028-
3029-
*objcgp = objcg;
3030-
cachep = memcg_cachep;
30312956
}
3032-
out_unlock:
3033-
rcu_read_unlock();
3034-
return cachep;
3035-
}
30362957

3037-
/**
3038-
* memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache
3039-
* @cachep: the cache returned by memcg_kmem_get_cache
3040-
*/
3041-
void memcg_kmem_put_cache(struct kmem_cache *cachep)
3042-
{
3043-
if (!is_root_cache(cachep))
3044-
percpu_ref_put(&cachep->memcg_params.refcnt);
2958+
return memcg_cachep;
30452959
}
30462960

30472961
/**
@@ -3731,7 +3645,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
37313645
*/
37323646
memcg->kmemcg_id = memcg_id;
37333647
memcg->kmem_state = KMEM_ONLINE;
3734-
INIT_LIST_HEAD(&memcg->kmem_caches);
37353648

37363649
return 0;
37373650
}
@@ -3744,22 +3657,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
37443657

37453658
if (memcg->kmem_state != KMEM_ONLINE)
37463659
return;
3747-
/*
3748-
* Clear the online state before clearing memcg_caches array
3749-
* entries. The slab_mutex in memcg_deactivate_kmem_caches()
3750-
* guarantees that no cache will be created for this cgroup
3751-
* after we are done (see memcg_create_kmem_cache()).
3752-
*/
3660+
37533661
memcg->kmem_state = KMEM_ALLOCATED;
37543662

37553663
parent = parent_mem_cgroup(memcg);
37563664
if (!parent)
37573665
parent = root_mem_cgroup;
37583666

3759-
/*
3760-
* Deactivate and reparent kmem_caches and objcgs.
3761-
*/
3762-
memcg_deactivate_kmem_caches(memcg, parent);
37633667
memcg_reparent_objcgs(memcg, parent);
37643668

37653669
kmemcg_id = memcg->kmemcg_id;
@@ -5384,9 +5288,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
53845288

53855289
/* The following stuff does not apply to the root */
53865290
if (!parent) {
5387-
#ifdef CONFIG_MEMCG_KMEM
5388-
INIT_LIST_HEAD(&memcg->kmem_caches);
5389-
#endif
53905291
root_mem_cgroup = memcg;
53915292
return &memcg->css;
53925293
}

mm/slab.c

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,7 +1249,7 @@ void __init kmem_cache_init(void)
12491249
nr_node_ids * sizeof(struct kmem_cache_node *),
12501250
SLAB_HWCACHE_ALIGN, 0, 0);
12511251
list_add(&kmem_cache->list, &slab_caches);
1252-
memcg_link_cache(kmem_cache, NULL);
1252+
memcg_link_cache(kmem_cache);
12531253
slab_state = PARTIAL;
12541254

12551255
/*
@@ -2253,17 +2253,6 @@ int __kmem_cache_shrink(struct kmem_cache *cachep)
22532253
return (ret ? 1 : 0);
22542254
}
22552255

2256-
#ifdef CONFIG_MEMCG
2257-
void __kmemcg_cache_deactivate(struct kmem_cache *cachep)
2258-
{
2259-
__kmem_cache_shrink(cachep);
2260-
}
2261-
2262-
void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
2263-
{
2264-
}
2265-
#endif
2266-
22672256
int __kmem_cache_shutdown(struct kmem_cache *cachep)
22682257
{
22692258
return __kmem_cache_shrink(cachep);
@@ -3872,7 +3861,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
38723861
return ret;
38733862

38743863
lockdep_assert_held(&slab_mutex);
3875-
for_each_memcg_cache(c, cachep) {
3864+
c = memcg_cache(cachep);
3865+
if (c) {
38763866
/* return value determined by the root cache only */
38773867
__do_tune_cpucache(c, limit, batchcount, shared, gfp);
38783868
}

0 commit comments

Comments
 (0)