Skip to content

Commit e36176b

Browse files
urezkitorvalds
authored andcommitted
mm/vmalloc: rework vmap_area_lock
With the new allocation approach introduced in the 5.2 kernel, it becomes possible to get rid of one global spinlock. By doing that we can further improve the KVA from the performance point of view. Basically we can have two independent locks, one for allocation part and another one for deallocation, because of two different entities: "free data structures" and "busy data structures". As a result, allocation/deallocation operations can still interfere between each other in case of running simultaneously on different CPUs, it means there is still dependency, but with two locks it becomes lower. Summarizing: - it reduces the high lock contention - it allows to perform operations on "free" and "busy" trees in parallel on different CPUs. Please note it does not solve scalability issue. Test results: In order to evaluate this patch, we can run "vmalloc test driver" to see how many CPU cycles it takes to complete all test cases running sequentially. All online CPUs run it so it will cause a high lock contention. HiKey 960, ARM64, 8xCPUs, big.LITTLE: <snip> sudo ./test_vmalloc.sh sequential_test_order=1 <snip> <default> [ 390.950557] All test took CPU0=457126382 cycles [ 391.046690] All test took CPU1=454763452 cycles [ 391.128586] All test took CPU2=454539334 cycles [ 391.222669] All test took CPU3=455649517 cycles [ 391.313946] All test took CPU4=388272196 cycles [ 391.410425] All test took CPU5=384036264 cycles [ 391.492219] All test took CPU6=387432964 cycles [ 391.578433] All test took CPU7=387201996 cycles <default> <patched> [ 304.721224] All test took CPU0=391521310 cycles [ 304.821219] All test took CPU1=393533002 cycles [ 304.917120] All test took CPU2=392243032 cycles [ 305.008986] All test took CPU3=392353853 cycles [ 305.108944] All test took CPU4=297630721 cycles [ 305.196406] All test took CPU5=297548736 cycles [ 305.288602] All test took CPU6=297092392 cycles [ 305.381088] All test took CPU7=297293597 cycles <patched> ~14%-23% patched variant is better. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Uladzislau Rezki (Sony) <[email protected]> Acked-by: Andrew Morton <[email protected]> Cc: Hillf Danton <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Oleksiy Avramchenko <[email protected]> Cc: Steven Rostedt <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 746dd40 commit e36176b

File tree

1 file changed

+50
-30
lines changed

1 file changed

+50
-30
lines changed

mm/vmalloc.c

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
331331

332332

333333
static DEFINE_SPINLOCK(vmap_area_lock);
334+
static DEFINE_SPINLOCK(free_vmap_area_lock);
334335
/* Export for kexec only */
335336
LIST_HEAD(vmap_area_list);
336337
static LLIST_HEAD(vmap_purge_list);
@@ -1114,7 +1115,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
11141115
*/
11151116
pva = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
11161117

1117-
spin_lock(&vmap_area_lock);
1118+
spin_lock(&free_vmap_area_lock);
11181119

11191120
if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva))
11201121
kmem_cache_free(vmap_area_cachep, pva);
@@ -1124,14 +1125,17 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
11241125
* returned. Therefore trigger the overflow path.
11251126
*/
11261127
addr = __alloc_vmap_area(size, align, vstart, vend);
1128+
spin_unlock(&free_vmap_area_lock);
1129+
11271130
if (unlikely(addr == vend))
11281131
goto overflow;
11291132

11301133
va->va_start = addr;
11311134
va->va_end = addr + size;
11321135
va->vm = NULL;
1133-
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
11341136

1137+
spin_lock(&vmap_area_lock);
1138+
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
11351139
spin_unlock(&vmap_area_lock);
11361140

11371141
BUG_ON(!IS_ALIGNED(va->va_start, align));
@@ -1141,7 +1145,6 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
11411145
return va;
11421146

11431147
overflow:
1144-
spin_unlock(&vmap_area_lock);
11451148
if (!purged) {
11461149
purge_vmap_area_lazy();
11471150
purged = 1;
@@ -1177,28 +1180,25 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb)
11771180
}
11781181
EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
11791182

1180-
static void __free_vmap_area(struct vmap_area *va)
1183+
/*
1184+
* Free a region of KVA allocated by alloc_vmap_area
1185+
*/
1186+
static void free_vmap_area(struct vmap_area *va)
11811187
{
11821188
/*
11831189
* Remove from the busy tree/list.
11841190
*/
1191+
spin_lock(&vmap_area_lock);
11851192
unlink_va(va, &vmap_area_root);
1193+
spin_unlock(&vmap_area_lock);
11861194

11871195
/*
1188-
* Merge VA with its neighbors, otherwise just add it.
1196+
* Insert/Merge it back to the free tree/list.
11891197
*/
1198+
spin_lock(&free_vmap_area_lock);
11901199
merge_or_add_vmap_area(va,
11911200
&free_vmap_area_root, &free_vmap_area_list);
1192-
}
1193-
1194-
/*
1195-
* Free a region of KVA allocated by alloc_vmap_area
1196-
*/
1197-
static void free_vmap_area(struct vmap_area *va)
1198-
{
1199-
spin_lock(&vmap_area_lock);
1200-
__free_vmap_area(va);
1201-
spin_unlock(&vmap_area_lock);
1201+
spin_unlock(&free_vmap_area_lock);
12021202
}
12031203

12041204
/*
@@ -1291,7 +1291,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
12911291
flush_tlb_kernel_range(start, end);
12921292
resched_threshold = lazy_max_pages() << 1;
12931293

1294-
spin_lock(&vmap_area_lock);
1294+
spin_lock(&free_vmap_area_lock);
12951295
llist_for_each_entry_safe(va, n_va, valist, purge_list) {
12961296
unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
12971297

@@ -1306,9 +1306,9 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
13061306
atomic_long_sub(nr, &vmap_lazy_nr);
13071307

13081308
if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
1309-
cond_resched_lock(&vmap_area_lock);
1309+
cond_resched_lock(&free_vmap_area_lock);
13101310
}
1311-
spin_unlock(&vmap_area_lock);
1311+
spin_unlock(&free_vmap_area_lock);
13121312
return true;
13131313
}
13141314

@@ -2030,15 +2030,21 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
20302030
}
20312031
EXPORT_SYMBOL_GPL(map_vm_area);
20322032

2033-
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
2034-
unsigned long flags, const void *caller)
2033+
static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
2034+
struct vmap_area *va, unsigned long flags, const void *caller)
20352035
{
2036-
spin_lock(&vmap_area_lock);
20372036
vm->flags = flags;
20382037
vm->addr = (void *)va->va_start;
20392038
vm->size = va->va_end - va->va_start;
20402039
vm->caller = caller;
20412040
va->vm = vm;
2041+
}
2042+
2043+
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
2044+
unsigned long flags, const void *caller)
2045+
{
2046+
spin_lock(&vmap_area_lock);
2047+
setup_vmalloc_vm_locked(vm, va, flags, caller);
20422048
spin_unlock(&vmap_area_lock);
20432049
}
20442050

@@ -3298,7 +3304,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
32983304
goto err_free;
32993305
}
33003306
retry:
3301-
spin_lock(&vmap_area_lock);
3307+
spin_lock(&free_vmap_area_lock);
33023308

33033309
/* start scanning - we scan from the top, begin with the last area */
33043310
area = term_area = last_area;
@@ -3380,29 +3386,38 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
33803386
va = vas[area];
33813387
va->va_start = start;
33823388
va->va_end = start + size;
3383-
3384-
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
33853389
}
33863390

3387-
spin_unlock(&vmap_area_lock);
3391+
spin_unlock(&free_vmap_area_lock);
33883392

33893393
/* insert all vm's */
3390-
for (area = 0; area < nr_vms; area++)
3391-
setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
3394+
spin_lock(&vmap_area_lock);
3395+
for (area = 0; area < nr_vms; area++) {
3396+
insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
3397+
3398+
setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
33923399
pcpu_get_vm_areas);
3400+
}
3401+
spin_unlock(&vmap_area_lock);
33933402

33943403
kfree(vas);
33953404
return vms;
33963405

33973406
recovery:
3398-
/* Remove previously inserted areas. */
3407+
/*
3408+
* Remove previously allocated areas. There is no
3409+
* need in removing these areas from the busy tree,
3410+
* because they are inserted only on the final step
3411+
* and when pcpu_get_vm_areas() is success.
3412+
*/
33993413
while (area--) {
3400-
__free_vmap_area(vas[area]);
3414+
merge_or_add_vmap_area(vas[area],
3415+
&free_vmap_area_root, &free_vmap_area_list);
34013416
vas[area] = NULL;
34023417
}
34033418

34043419
overflow:
3405-
spin_unlock(&vmap_area_lock);
3420+
spin_unlock(&free_vmap_area_lock);
34063421
if (!purged) {
34073422
purge_vmap_area_lazy();
34083423
purged = true;
@@ -3453,9 +3468,12 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
34533468

34543469
#ifdef CONFIG_PROC_FS
34553470
static void *s_start(struct seq_file *m, loff_t *pos)
3471+
__acquires(&vmap_purge_lock)
34563472
__acquires(&vmap_area_lock)
34573473
{
3474+
mutex_lock(&vmap_purge_lock);
34583475
spin_lock(&vmap_area_lock);
3476+
34593477
return seq_list_start(&vmap_area_list, *pos);
34603478
}
34613479

@@ -3465,8 +3483,10 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
34653483
}
34663484

34673485
static void s_stop(struct seq_file *m, void *p)
3486+
__releases(&vmap_purge_lock)
34683487
__releases(&vmap_area_lock)
34693488
{
3489+
mutex_unlock(&vmap_purge_lock);
34703490
spin_unlock(&vmap_area_lock);
34713491
}
34723492

0 commit comments

Comments
 (0)