Skip to content

Commit bf4f059

Browse files
rgushchintorvalds
authored andcommitted
mm: memcg/slab: obj_cgroup API
Obj_cgroup API provides an ability to account sub-page sized kernel objects, which potentially outlive the original memory cgroup. The top-level API consists of the following functions: bool obj_cgroup_tryget(struct obj_cgroup *objcg); void obj_cgroup_get(struct obj_cgroup *objcg); void obj_cgroup_put(struct obj_cgroup *objcg); int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg); struct obj_cgroup *get_obj_cgroup_from_current(void); Object cgroup is basically a pointer to a memory cgroup with a per-cpu reference counter. It substitutes a memory cgroup in places where it's necessary to charge a custom amount of bytes instead of pages. All charged memory rounded down to pages is charged to the corresponding memory cgroup using __memcg_kmem_charge(). It implements reparenting: on memcg offlining it's getting reattached to the parent memory cgroup. Each online memory cgroup has an associated active object cgroup to handle new allocations and the list of all attached object cgroups. On offlining of a cgroup this list is reparented and for each object cgroup in the list the memcg pointer is swapped to the parent memory cgroup. It prevents long-living objects from pinning the original memory cgroup in the memory. The implementation is based on byte-sized per-cpu stocks. A sub-page sized leftover is stored in an atomic field, which is a part of obj_cgroup object. So on cgroup offlining the leftover is automatically reparented. memcg->objcg is rcu protected. objcg->memcg is a raw pointer, which is always pointing at a memory cgroup, but can be atomically swapped to the parent memory cgroup. So a user must ensure the lifetime of the cgroup, e.g. grab rcu_read_lock or css_set_lock. Suggested-by: Johannes Weiner <[email protected]> Signed-off-by: Roman Gushchin <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Reviewed-by: Shakeel Butt <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Vlastimil Babka <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 1a3e1f4 commit bf4f059

File tree

2 files changed

+338
-1
lines changed

2 files changed

+338
-1
lines changed

include/linux/memcontrol.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <linux/page-flags.h>
2424

2525
struct mem_cgroup;
26+
struct obj_cgroup;
2627
struct page;
2728
struct mm_struct;
2829
struct kmem_cache;
@@ -192,6 +193,22 @@ struct memcg_cgwb_frn {
192193
struct wb_completion done; /* tracks in-flight foreign writebacks */
193194
};
194195

196+
/*
197+
* Bucket for arbitrarily byte-sized objects charged to a memory
198+
* cgroup. The bucket can be reparented in one piece when the cgroup
199+
* is destroyed, without having to round up the individual references
200+
* of all live memory objects in the wild.
201+
*/
202+
struct obj_cgroup {
203+
struct percpu_ref refcnt;
204+
struct mem_cgroup *memcg;
205+
atomic_t nr_charged_bytes;
206+
union {
207+
struct list_head list;
208+
struct rcu_head rcu;
209+
};
210+
};
211+
195212
/*
196213
* The memory controller data structure. The memory controller controls both
197214
* page cache and RSS per cgroup. We would eventually like to provide
@@ -301,6 +318,8 @@ struct mem_cgroup {
301318
int kmemcg_id;
302319
enum memcg_kmem_state kmem_state;
303320
struct list_head kmem_caches;
321+
struct obj_cgroup __rcu *objcg;
322+
struct list_head objcg_list; /* list of inherited objcgs */
304323
#endif
305324

306325
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -416,6 +435,33 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
416435
return css ? container_of(css, struct mem_cgroup, css) : NULL;
417436
}
418437

438+
static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
439+
{
440+
return percpu_ref_tryget(&objcg->refcnt);
441+
}
442+
443+
static inline void obj_cgroup_get(struct obj_cgroup *objcg)
444+
{
445+
percpu_ref_get(&objcg->refcnt);
446+
}
447+
448+
static inline void obj_cgroup_put(struct obj_cgroup *objcg)
449+
{
450+
percpu_ref_put(&objcg->refcnt);
451+
}
452+
453+
/*
454+
* After the initialization objcg->memcg is always pointing at
455+
* a valid memcg, but can be atomically swapped to the parent memcg.
456+
*
457+
* The caller must ensure that the returned memcg won't be released:
458+
* e.g. acquire the rcu_read_lock or css_set_lock.
459+
*/
460+
static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
461+
{
462+
return READ_ONCE(objcg->memcg);
463+
}
464+
419465
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
420466
{
421467
if (memcg)
@@ -1368,6 +1414,11 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
13681414
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
13691415
void __memcg_kmem_uncharge_page(struct page *page, int order);
13701416

1417+
struct obj_cgroup *get_obj_cgroup_from_current(void);
1418+
1419+
int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
1420+
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
1421+
13711422
extern struct static_key_false memcg_kmem_enabled_key;
13721423
extern struct workqueue_struct *memcg_kmem_cache_wq;
13731424

0 commit comments

Comments
 (0)