Skip to content

Commit f7e1cb6

Browse files
hnaztorvalds
authored andcommitted
mm: memcontrol: account socket memory in unified hierarchy memory controller
Socket memory can be a significant share of overall memory consumed by common workloads. In order to provide reasonable resource isolation in the unified hierarchy, this type of memory needs to be included in the tracking/accounting of a cgroup under active memory resource control. Overhead is only incurred when a non-root control group is created AND the memory controller is instructed to track and account the memory footprint of that group. cgroup.memory=nosocket can be specified on the boot commandline to override any runtime configuration and forcibly exclude socket memory from active memory resource control. Signed-off-by: Johannes Weiner <[email protected]> Acked-by: David S. Miller <[email protected]> Reviewed-by: Vladimir Davydov <[email protected]> Acked-by: Michal Hocko <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 1109208 commit f7e1cb6

File tree

3 files changed

+110
-25
lines changed

3 files changed

+110
-25
lines changed

Documentation/kernel-parameters.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
608608
cut the overhead, others just disable the usage. So
609609
only cgroup_disable=memory is actually worthy}
610610

611+
cgroup.memory= [KNL] Pass options to the cgroup memory controller.
612+
Format: <string>
613+
nosocket -- Disable socket memory accounting.
614+
611615
checkreqprot [SELINUX] Set initial checkreqprot flag value.
612616
Format: { "0" | "1" }
613617
See security/selinux/Kconfig help text.

include/linux/memcontrol.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,9 @@ struct mem_cgroup {
170170
unsigned long low;
171171
unsigned long high;
172172

173+
/* Range enforcement for interrupt charges */
174+
struct work_struct high_work;
175+
173176
unsigned long soft_limit;
174177

175178
/* vmpressure notifications */
@@ -680,12 +683,16 @@ void sock_update_memcg(struct sock *sk);
680683
void sock_release_memcg(struct sock *sk);
681684
bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
682685
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
683-
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
686+
#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
684687
extern struct static_key memcg_sockets_enabled_key;
685688
#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
686689
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
687690
{
691+
#ifdef CONFIG_MEMCG_KMEM
688692
return memcg->tcp_mem.memory_pressure;
693+
#else
694+
return false;
695+
#endif
689696
}
690697
#else
691698
#define mem_cgroup_sockets_enabled 0

mm/memcontrol.c

Lines changed: 98 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
8080

8181
#define MEM_CGROUP_RECLAIM_RETRIES 5
8282

83+
/* Socket memory accounting disabled? */
84+
static bool cgroup_memory_nosocket;
85+
8386
/* Whether the swap controller is active */
8487
#ifdef CONFIG_MEMCG_SWAP
8588
int do_swap_account __read_mostly;
@@ -1945,27 +1948,40 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
19451948
return NOTIFY_OK;
19461949
}
19471950

1951+
static void reclaim_high(struct mem_cgroup *memcg,
1952+
unsigned int nr_pages,
1953+
gfp_t gfp_mask)
1954+
{
1955+
do {
1956+
if (page_counter_read(&memcg->memory) <= memcg->high)
1957+
continue;
1958+
mem_cgroup_events(memcg, MEMCG_HIGH, 1);
1959+
try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
1960+
} while ((memcg = parent_mem_cgroup(memcg)));
1961+
}
1962+
1963+
static void high_work_func(struct work_struct *work)
1964+
{
1965+
struct mem_cgroup *memcg;
1966+
1967+
memcg = container_of(work, struct mem_cgroup, high_work);
1968+
reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
1969+
}
1970+
19481971
/*
19491972
* Scheduled by try_charge() to be executed from the userland return path
19501973
* and reclaims memory over the high limit.
19511974
*/
19521975
void mem_cgroup_handle_over_high(void)
19531976
{
19541977
unsigned int nr_pages = current->memcg_nr_pages_over_high;
1955-
struct mem_cgroup *memcg, *pos;
1978+
struct mem_cgroup *memcg;
19561979

19571980
if (likely(!nr_pages))
19581981
return;
19591982

1960-
pos = memcg = get_mem_cgroup_from_mm(current->mm);
1961-
1962-
do {
1963-
if (page_counter_read(&pos->memory) <= pos->high)
1964-
continue;
1965-
mem_cgroup_events(pos, MEMCG_HIGH, 1);
1966-
try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
1967-
} while ((pos = parent_mem_cgroup(pos)));
1968-
1983+
memcg = get_mem_cgroup_from_mm(current->mm);
1984+
reclaim_high(memcg, nr_pages, GFP_KERNEL);
19691985
css_put(&memcg->css);
19701986
current->memcg_nr_pages_over_high = 0;
19711987
}
@@ -2100,6 +2116,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
21002116
*/
21012117
do {
21022118
if (page_counter_read(&memcg->memory) > memcg->high) {
2119+
/* Don't bother a random interrupted task */
2120+
if (in_interrupt()) {
2121+
schedule_work(&memcg->high_work);
2122+
break;
2123+
}
21032124
current->memcg_nr_pages_over_high += batch;
21042125
set_notify_resume(current);
21052126
break;
@@ -4150,6 +4171,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
41504171
{
41514172
int node;
41524173

4174+
cancel_work_sync(&memcg->high_work);
4175+
41534176
mem_cgroup_remove_from_trees(memcg);
41544177

41554178
for_each_node(node)
@@ -4196,6 +4219,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
41964219
page_counter_init(&memcg->kmem, NULL);
41974220
}
41984221

4222+
INIT_WORK(&memcg->high_work, high_work_func);
41994223
memcg->last_scanned_node = MAX_NUMNODES;
42004224
INIT_LIST_HEAD(&memcg->oom_notify);
42014225
memcg->move_charge_at_immigrate = 0;
@@ -4267,6 +4291,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
42674291
if (ret)
42684292
return ret;
42694293

4294+
#ifdef CONFIG_INET
4295+
if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
4296+
static_key_slow_inc(&memcg_sockets_enabled_key);
4297+
#endif
4298+
42704299
/*
42714300
* Make sure the memcg is initialized: mem_cgroup_iter()
42724301
* orders reading memcg->initialized against its callers
@@ -4313,6 +4342,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
43134342
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
43144343

43154344
memcg_destroy_kmem(memcg);
4345+
#ifdef CONFIG_INET
4346+
if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
4347+
static_key_slow_dec(&memcg_sockets_enabled_key);
4348+
#endif
43164349
__mem_cgroup_free(memcg);
43174350
}
43184351

@@ -5533,8 +5566,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
55335566
commit_charge(newpage, memcg, true);
55345567
}
55355568

5536-
/* Writing them here to avoid exposing memcg's inner layout */
5537-
#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
5569+
#ifdef CONFIG_INET
55385570

55395571
struct static_key memcg_sockets_enabled_key;
55405572
EXPORT_SYMBOL(memcg_sockets_enabled_key);
@@ -5559,10 +5591,15 @@ void sock_update_memcg(struct sock *sk)
55595591

55605592
rcu_read_lock();
55615593
memcg = mem_cgroup_from_task(current);
5562-
if (memcg != root_mem_cgroup &&
5563-
memcg->tcp_mem.active &&
5564-
css_tryget_online(&memcg->css))
5594+
if (memcg == root_mem_cgroup)
5595+
goto out;
5596+
#ifdef CONFIG_MEMCG_KMEM
5597+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active)
5598+
goto out;
5599+
#endif
5600+
if (css_tryget_online(&memcg->css))
55655601
sk->sk_memcg = memcg;
5602+
out:
55665603
rcu_read_unlock();
55675604
}
55685605
EXPORT_SYMBOL(sock_update_memcg);
@@ -5583,15 +5620,30 @@ void sock_release_memcg(struct sock *sk)
55835620
*/
55845621
bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
55855622
{
5586-
struct page_counter *counter;
5623+
gfp_t gfp_mask = GFP_KERNEL;
55875624

5588-
if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
5589-
nr_pages, &counter)) {
5590-
memcg->tcp_mem.memory_pressure = 0;
5591-
return true;
5625+
#ifdef CONFIG_MEMCG_KMEM
5626+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
5627+
struct page_counter *counter;
5628+
5629+
if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
5630+
nr_pages, &counter)) {
5631+
memcg->tcp_mem.memory_pressure = 0;
5632+
return true;
5633+
}
5634+
page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
5635+
memcg->tcp_mem.memory_pressure = 1;
5636+
return false;
55925637
}
5593-
page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
5594-
memcg->tcp_mem.memory_pressure = 1;
5638+
#endif
5639+
/* Don't block in the packet receive path */
5640+
if (in_softirq())
5641+
gfp_mask = GFP_NOWAIT;
5642+
5643+
if (try_charge(memcg, gfp_mask, nr_pages) == 0)
5644+
return true;
5645+
5646+
try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
55955647
return false;
55965648
}
55975649

@@ -5602,10 +5654,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
56025654
*/
56035655
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
56045656
{
5605-
page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
5657+
#ifdef CONFIG_MEMCG_KMEM
5658+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
5659+
page_counter_uncharge(&memcg->tcp_mem.memory_allocated,
5660+
nr_pages);
5661+
return;
5662+
}
5663+
#endif
5664+
page_counter_uncharge(&memcg->memory, nr_pages);
5665+
css_put_many(&memcg->css, nr_pages);
56065666
}
56075667

5608-
#endif
5668+
#endif /* CONFIG_INET */
5669+
5670+
static int __init cgroup_memory(char *s)
5671+
{
5672+
char *token;
5673+
5674+
while ((token = strsep(&s, ",")) != NULL) {
5675+
if (!*token)
5676+
continue;
5677+
if (!strcmp(token, "nosocket"))
5678+
cgroup_memory_nosocket = true;
5679+
}
5680+
return 0;
5681+
}
5682+
__setup("cgroup.memory=", cgroup_memory);
56095683

56105684
/*
56115685
* subsys_initcall() for memory controller.

0 commit comments

Comments
 (0)