Skip to content

Commit 56f3547

Browse files
ftang1torvalds
authored andcommitted
mm: adjust vm_committed_as_batch according to vm overcommit policy
When checking a performance change for will-it-scale scalability mmap test [1], we found very high lock contention for spinlock of percpu counter 'vm_committed_as': 94.14% 0.35% [kernel.kallsyms] [k] _raw_spin_lock_irqsave 48.21% _raw_spin_lock_irqsave;percpu_counter_add_batch;__vm_enough_memory;mmap_region;do_mmap; 45.91% _raw_spin_lock_irqsave;percpu_counter_add_batch;__do_munmap; Actually this heavy lock contention is not always necessary. The 'vm_committed_as' needs to be very precise when the strict OVERCOMMIT_NEVER policy is set, which requires a rather small batch number for the percpu counter. So keep 'batch' number unchanged for strict OVERCOMMIT_NEVER policy, and lift it to 64X for OVERCOMMIT_ALWAYS and OVERCOMMIT_GUESS policies. Also add a sysctl handler to adjust it when the policy is reconfigured. Benchmark with the same testcase in [1] shows 53% improvement on a 8C/16T desktop, and 2097%(20X) on a 4S/72C/144T server. We tested with test platforms in 0day (server, desktop and laptop), and 80%+ platforms shows improvements with that test. And whether it shows improvements depends on if the test mmap size is bigger than the batch number computed. And if the lift is 16X, 1/3 of the platforms will show improvements, though it should help the mmap/unmap usage generally, as Michal Hocko mentioned: : I believe that there are non-synthetic worklaods which would benefit from : a larger batch. E.g. large in memory databases which do large mmaps : during startups from multiple threads. [1] https://lore.kernel.org/lkml/20200305062138.GI5972@shao2-debian/ Signed-off-by: Feng Tang <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Acked-by: Michal Hocko <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Qian Cai <[email protected]> Cc: Kees Cook <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Tim Chen <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Huang Ying <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Dennis Zhou <[email protected]> Cc: Haiyang Zhang <[email protected]> Cc: kernel test robot <[email protected]> Cc: "K. Y. Srinivasan" <[email protected]> Cc: Tejun Heo <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0a4954a commit 56f3547

File tree

5 files changed

+64
-7
lines changed

5 files changed

+64
-7
lines changed

include/linux/mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
206206
loff_t *);
207207
int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
208208
loff_t *);
209+
int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
210+
loff_t *);
209211

210212
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
211213

include/linux/mman.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,12 @@ extern struct percpu_counter vm_committed_as;
5757

5858
#ifdef CONFIG_SMP
5959
extern s32 vm_committed_as_batch;
60+
extern void mm_compute_batch(int overcommit_policy);
6061
#else
6162
#define vm_committed_as_batch 0
63+
static inline void mm_compute_batch(int overcommit_policy)
64+
{
65+
}
6266
#endif
6367

6468
unsigned long vm_memory_committed(void);

kernel/sysctl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2671,7 +2671,7 @@ static struct ctl_table vm_table[] = {
26712671
.data = &sysctl_overcommit_memory,
26722672
.maxlen = sizeof(sysctl_overcommit_memory),
26732673
.mode = 0644,
2674-
.proc_handler = proc_dointvec_minmax,
2674+
.proc_handler = overcommit_policy_handler,
26752675
.extra1 = SYSCTL_ZERO,
26762676
.extra2 = &two,
26772677
},

mm/mm_init.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/memory.h>
1414
#include <linux/notifier.h>
1515
#include <linux/sched.h>
16+
#include <linux/mman.h>
1617
#include "internal.h"
1718

1819
#ifdef CONFIG_DEBUG_MEMORY_INIT
@@ -144,14 +145,23 @@ EXPORT_SYMBOL_GPL(mm_kobj);
144145
#ifdef CONFIG_SMP
145146
s32 vm_committed_as_batch = 32;
146147

147-
static void __meminit mm_compute_batch(void)
148+
void mm_compute_batch(int overcommit_policy)
148149
{
149150
u64 memsized_batch;
150151
s32 nr = num_present_cpus();
151152
s32 batch = max_t(s32, nr*2, 32);
152-
153-
/* batch size set to 0.4% of (total memory/#cpus), or max int32 */
154-
memsized_batch = min_t(u64, (totalram_pages()/nr)/256, 0x7fffffff);
153+
unsigned long ram_pages = totalram_pages();
154+
155+
/*
156+
* For policy OVERCOMMIT_NEVER, set batch size to 0.4% of
157+
* (total memory/#cpus), and lift it to 25% for other policies
158+
* to easy the possible lock contention for percpu_counter
159+
* vm_committed_as, while the max limit is INT_MAX
160+
*/
161+
if (overcommit_policy == OVERCOMMIT_NEVER)
162+
memsized_batch = min_t(u64, ram_pages/nr/256, INT_MAX);
163+
else
164+
memsized_batch = min_t(u64, ram_pages/nr/4, INT_MAX);
155165

156166
vm_committed_as_batch = max_t(s32, memsized_batch, batch);
157167
}
@@ -162,7 +172,7 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self,
162172
switch (action) {
163173
case MEM_ONLINE:
164174
case MEM_OFFLINE:
165-
mm_compute_batch();
175+
mm_compute_batch(sysctl_overcommit_memory);
166176
default:
167177
break;
168178
}
@@ -176,7 +186,7 @@ static struct notifier_block compute_batch_nb __meminitdata = {
176186

177187
static int __init mm_compute_batch_init(void)
178188
{
179-
mm_compute_batch();
189+
mm_compute_batch(sysctl_overcommit_memory);
180190
register_hotmemory_notifier(&compute_batch_nb);
181191

182192
return 0;

mm/util.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,47 @@ int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer,
746746
return ret;
747747
}
748748

749+
static void sync_overcommit_as(struct work_struct *dummy)
750+
{
751+
percpu_counter_sync(&vm_committed_as);
752+
}
753+
754+
int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer,
755+
size_t *lenp, loff_t *ppos)
756+
{
757+
struct ctl_table t;
758+
int new_policy;
759+
int ret;
760+
761+
/*
762+
* The deviation of sync_overcommit_as could be big with loose policy
763+
* like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to
764+
* strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply
765+
* with the strict "NEVER", and to avoid possible race condtion (even
766+
* though user usually won't too frequently do the switching to policy
767+
* OVERCOMMIT_NEVER), the switch is done in the following order:
768+
* 1. changing the batch
769+
* 2. sync percpu count on each CPU
770+
* 3. switch the policy
771+
*/
772+
if (write) {
773+
t = *table;
774+
t.data = &new_policy;
775+
ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
776+
if (ret)
777+
return ret;
778+
779+
mm_compute_batch(new_policy);
780+
if (new_policy == OVERCOMMIT_NEVER)
781+
schedule_on_each_cpu(sync_overcommit_as);
782+
sysctl_overcommit_memory = new_policy;
783+
} else {
784+
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
785+
}
786+
787+
return ret;
788+
}
789+
749790
int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer,
750791
size_t *lenp, loff_t *ppos)
751792
{

0 commit comments

Comments
 (0)