Skip to content

Commit 1aacbd3

Browse files
rgushchinakpm00
authored andcommitted
mm: kmem: add direct objcg pointer to task_struct
To charge a freshly allocated kernel object to a memory cgroup, the kernel needs to obtain an objcg pointer. Currently it does it indirectly by obtaining the memcg pointer first and then calling to __get_obj_cgroup_from_memcg(). Usually tasks spend their entire life belonging to the same object cgroup. So it makes sense to save the objcg pointer on task_struct directly, so it can be obtained faster. It requires some work on fork, exit and cgroup migrate paths, but these paths are way colder. To avoid any costly synchronization the following rules are applied: 1) A task sets it's objcg pointer itself. 2) If a task is being migrated to another cgroup, the least significant bit of the objcg pointer is set atomically. 3) On the allocation path the objcg pointer is obtained locklessly using the READ_ONCE() macro and the least significant bit is checked. If it's set, the following procedure is used to update it locklessly: - task->objcg is zeroed using cmpxcg - new objcg pointer is obtained - task->objcg is updated using try_cmpxchg - operation is repeated if try_cmpxcg fails It guarantees that no updates will be lost if task migration is racing against objcg pointer update. It also allows to keep both read and write paths fully lockless. Because the task is keeping a reference to the objcg, it can't go away while the task is alive. This commit doesn't change the way the remote memcg charging works. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Roman Gushchin (Cruise) <[email protected]> Tested-by: Naresh Kamboju <[email protected]> Acked-by: Johannes Weiner <[email protected]> Acked-by: Shakeel Butt <[email protected]> Reviewed-by: Vlastimil Babka <[email protected]> Cc: David Rientjes <[email protected]> Cc: Dennis Zhou <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Muchun Song <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 7d0715d commit 1aacbd3

File tree

2 files changed

+134
-9
lines changed

2 files changed

+134
-9
lines changed

include/linux/sched.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,10 @@ struct task_struct {
14431443
struct mem_cgroup *active_memcg;
14441444
#endif
14451445

1446+
#ifdef CONFIG_MEMCG_KMEM
1447+
struct obj_cgroup *objcg;
1448+
#endif
1449+
14461450
#ifdef CONFIG_BLK_CGROUP
14471451
struct gendisk *throttle_disk;
14481452
#endif

mm/memcontrol.c

Lines changed: 130 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
249249
return container_of(vmpr, struct mem_cgroup, vmpressure);
250250
}
251251

252+
#define CURRENT_OBJCG_UPDATE_BIT 0
253+
#define CURRENT_OBJCG_UPDATE_FLAG (1UL << CURRENT_OBJCG_UPDATE_BIT)
254+
252255
#ifdef CONFIG_MEMCG_KMEM
253256
static DEFINE_SPINLOCK(objcg_lock);
254257

@@ -3083,26 +3086,85 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
30833086
return objcg;
30843087
}
30853088

3089+
static struct obj_cgroup *current_objcg_update(void)
3090+
{
3091+
struct mem_cgroup *memcg;
3092+
struct obj_cgroup *old, *objcg = NULL;
3093+
3094+
do {
3095+
/* Atomically drop the update bit. */
3096+
old = xchg(&current->objcg, NULL);
3097+
if (old) {
3098+
old = (struct obj_cgroup *)
3099+
((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
3100+
if (old)
3101+
obj_cgroup_put(old);
3102+
3103+
old = NULL;
3104+
}
3105+
3106+
/* If new objcg is NULL, no reason for the second atomic update. */
3107+
if (!current->mm || (current->flags & PF_KTHREAD))
3108+
return NULL;
3109+
3110+
/*
3111+
* Release the objcg pointer from the previous iteration,
3112+
* if try_cmpxcg() below fails.
3113+
*/
3114+
if (unlikely(objcg)) {
3115+
obj_cgroup_put(objcg);
3116+
objcg = NULL;
3117+
}
3118+
3119+
/*
3120+
* Obtain the new objcg pointer. The current task can be
3121+
* asynchronously moved to another memcg and the previous
3122+
* memcg can be offlined. So let's get the memcg pointer
3123+
* and try get a reference to objcg under a rcu read lock.
3124+
*/
3125+
3126+
rcu_read_lock();
3127+
memcg = mem_cgroup_from_task(current);
3128+
objcg = __get_obj_cgroup_from_memcg(memcg);
3129+
rcu_read_unlock();
3130+
3131+
/*
3132+
* Try set up a new objcg pointer atomically. If it
3133+
* fails, it means the update flag was set concurrently, so
3134+
* the whole procedure should be repeated.
3135+
*/
3136+
} while (!try_cmpxchg(&current->objcg, &old, objcg));
3137+
3138+
return objcg;
3139+
}
3140+
30863141
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
30873142
{
30883143
struct mem_cgroup *memcg;
30893144
struct obj_cgroup *objcg;
30903145

30913146
if (in_task()) {
30923147
memcg = current->active_memcg;
3148+
if (unlikely(memcg))
3149+
goto from_memcg;
30933150

3094-
/* Memcg to charge can't be determined. */
3095-
if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
3096-
return NULL;
3151+
objcg = READ_ONCE(current->objcg);
3152+
if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
3153+
objcg = current_objcg_update();
3154+
3155+
if (objcg) {
3156+
obj_cgroup_get(objcg);
3157+
return objcg;
3158+
}
30973159
} else {
30983160
memcg = this_cpu_read(int_active_memcg);
3099-
if (likely(!memcg))
3100-
return NULL;
3161+
if (unlikely(memcg))
3162+
goto from_memcg;
31013163
}
3164+
return NULL;
31023165

3166+
from_memcg:
31033167
rcu_read_lock();
3104-
if (!memcg)
3105-
memcg = mem_cgroup_from_task(current);
31063168
objcg = __get_obj_cgroup_from_memcg(memcg);
31073169
rcu_read_unlock();
31083170
return objcg;
@@ -6440,6 +6502,7 @@ static void mem_cgroup_move_task(void)
64406502
mem_cgroup_clear_mc();
64416503
}
64426504
}
6505+
64436506
#else /* !CONFIG_MMU */
64446507
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
64456508
{
@@ -6453,8 +6516,39 @@ static void mem_cgroup_move_task(void)
64536516
}
64546517
#endif
64556518

6519+
#ifdef CONFIG_MEMCG_KMEM
6520+
static void mem_cgroup_fork(struct task_struct *task)
6521+
{
6522+
/*
6523+
* Set the update flag to cause task->objcg to be initialized lazily
6524+
* on the first allocation. It can be done without any synchronization
6525+
* because it's always performed on the current task, so does
6526+
* current_objcg_update().
6527+
*/
6528+
task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
6529+
}
6530+
6531+
static void mem_cgroup_exit(struct task_struct *task)
6532+
{
6533+
struct obj_cgroup *objcg = task->objcg;
6534+
6535+
objcg = (struct obj_cgroup *)
6536+
((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
6537+
if (objcg)
6538+
obj_cgroup_put(objcg);
6539+
6540+
/*
6541+
* Some kernel allocations can happen after this point,
6542+
* but let's ignore them. It can be done without any synchronization
6543+
* because it's always performed on the current task, so does
6544+
* current_objcg_update().
6545+
*/
6546+
task->objcg = NULL;
6547+
}
6548+
#endif
6549+
64566550
#ifdef CONFIG_LRU_GEN
6457-
static void mem_cgroup_attach(struct cgroup_taskset *tset)
6551+
static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
64586552
{
64596553
struct task_struct *task;
64606554
struct cgroup_subsys_state *css;
@@ -6472,10 +6566,31 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
64726566
task_unlock(task);
64736567
}
64746568
#else
6569+
static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
6570+
#endif /* CONFIG_LRU_GEN */
6571+
6572+
#ifdef CONFIG_MEMCG_KMEM
6573+
static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
6574+
{
6575+
struct task_struct *task;
6576+
struct cgroup_subsys_state *css;
6577+
6578+
cgroup_taskset_for_each(task, css, tset) {
6579+
/* atomically set the update bit */
6580+
set_bit(CURRENT_OBJCG_UPDATE_BIT, (unsigned long *)&task->objcg);
6581+
}
6582+
}
6583+
#else
6584+
static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
6585+
#endif /* CONFIG_MEMCG_KMEM */
6586+
6587+
#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
64756588
static void mem_cgroup_attach(struct cgroup_taskset *tset)
64766589
{
6590+
mem_cgroup_lru_gen_attach(tset);
6591+
mem_cgroup_kmem_attach(tset);
64776592
}
6478-
#endif /* CONFIG_LRU_GEN */
6593+
#endif
64796594

64806595
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
64816596
{
@@ -6885,9 +7000,15 @@ struct cgroup_subsys memory_cgrp_subsys = {
68857000
.css_reset = mem_cgroup_css_reset,
68867001
.css_rstat_flush = mem_cgroup_css_rstat_flush,
68877002
.can_attach = mem_cgroup_can_attach,
7003+
#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
68887004
.attach = mem_cgroup_attach,
7005+
#endif
68897006
.cancel_attach = mem_cgroup_cancel_attach,
68907007
.post_attach = mem_cgroup_move_task,
7008+
#ifdef CONFIG_MEMCG_KMEM
7009+
.fork = mem_cgroup_fork,
7010+
.exit = mem_cgroup_exit,
7011+
#endif
68917012
.dfl_cftypes = memory_files,
68927013
.legacy_cftypes = mem_cgroup_legacy_files,
68937014
.early_init = 0,

0 commit comments

Comments
 (0)