Skip to content

Commit 763802b

Browse files
joergroedeltorvalds
authored andcommitted
x86/mm: split vmalloc_sync_all()
Commit 3f8fd02 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in the vunmap() code-path. While this change was necessary to maintain correctness on x86-32-pae kernels, it also adds additional cycles for architectures that don't need it. Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported severe performance regressions in micro-benchmarks because it now also calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But the vmalloc_sync_all() implementation on x86-64 is only needed for newly created mappings. To avoid the unnecessary work on x86-64 and to gain the performance back, split up vmalloc_sync_all() into two functions: * vmalloc_sync_mappings(), and * vmalloc_sync_unmappings() Most call-sites to vmalloc_sync_all() only care about new mappings being synchronized. The only exception is the new call-site added in the above mentioned commit. Shile Zhang directed us to a report of an 80% regression in reaim throughput. Fixes: 3f8fd02 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") Reported-by: kernel test robot <[email protected]> Reported-by: Shile Zhang <[email protected]> Signed-off-by: Joerg Roedel <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Tested-by: Borislav Petkov <[email protected]> Acked-by: Rafael J. Wysocki <[email protected]> [GHES] Cc: Dave Hansen <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Link: https://lists.01.org/hyperkitty/list/[email protected]/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/ Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0715e6c commit 763802b

File tree

6 files changed

+43
-13
lines changed

6 files changed

+43
-13
lines changed

arch/x86/mm/fault.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
190190
return pmd_k;
191191
}
192192

193-
void vmalloc_sync_all(void)
193+
static void vmalloc_sync(void)
194194
{
195195
unsigned long address;
196196

@@ -217,6 +217,16 @@ void vmalloc_sync_all(void)
217217
}
218218
}
219219

220+
void vmalloc_sync_mappings(void)
221+
{
222+
vmalloc_sync();
223+
}
224+
225+
void vmalloc_sync_unmappings(void)
226+
{
227+
vmalloc_sync();
228+
}
229+
220230
/*
221231
* 32-bit:
222232
*
@@ -319,11 +329,23 @@ static void dump_pagetable(unsigned long address)
319329

320330
#else /* CONFIG_X86_64: */
321331

322-
void vmalloc_sync_all(void)
332+
void vmalloc_sync_mappings(void)
323333
{
334+
/*
335+
* 64-bit mappings might allocate new p4d/pud pages
336+
* that need to be propagated to all tasks' PGDs.
337+
*/
324338
sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
325339
}
326340

341+
void vmalloc_sync_unmappings(void)
342+
{
343+
/*
344+
* Unmappings never allocate or free p4d/pud pages.
345+
* No work is required here.
346+
*/
347+
}
348+
327349
/*
328350
* 64-bit:
329351
*

drivers/acpi/apei/ghes.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes)
171171
* New allocation must be visible in all pgd before it can be found by
172172
* an NMI allocating from the pool.
173173
*/
174-
vmalloc_sync_all();
174+
vmalloc_sync_mappings();
175175

176176
rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
177177
if (rc)

include/linux/vmalloc.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
141141

142142
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
143143
unsigned long pgoff);
144-
void vmalloc_sync_all(void);
145-
144+
void vmalloc_sync_mappings(void);
145+
void vmalloc_sync_unmappings(void);
146+
146147
/*
147148
* Lowlevel-APIs (not for driver use!)
148149
*/

kernel/notifier.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die);
519519

520520
int register_die_notifier(struct notifier_block *nb)
521521
{
522-
vmalloc_sync_all();
522+
vmalloc_sync_mappings();
523523
return atomic_notifier_chain_register(&die_chain, nb);
524524
}
525525
EXPORT_SYMBOL_GPL(register_die_notifier);

mm/nommu.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,10 +370,14 @@ void vm_unmap_aliases(void)
370370
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
371371

372372
/*
373-
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
374-
* have one.
373+
* Implement a stub for vmalloc_sync_[un]mapping() if the architecture
374+
* chose not to have one.
375375
*/
376-
void __weak vmalloc_sync_all(void)
376+
void __weak vmalloc_sync_mappings(void)
377+
{
378+
}
379+
380+
void __weak vmalloc_sync_unmappings(void)
377381
{
378382
}
379383

mm/vmalloc.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,7 +1295,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
12951295
* First make sure the mappings are removed from all page-tables
12961296
* before they are freed.
12971297
*/
1298-
vmalloc_sync_all();
1298+
vmalloc_sync_unmappings();
12991299

13001300
/*
13011301
* TODO: to calculate a flush range without looping.
@@ -3128,16 +3128,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
31283128
EXPORT_SYMBOL(remap_vmalloc_range);
31293129

31303130
/*
3131-
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
3132-
* have one.
3131+
* Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
3132+
* not to have one.
31333133
*
31343134
* The purpose of this function is to make sure the vmalloc area
31353135
* mappings are identical in all page-tables in the system.
31363136
*/
3137-
void __weak vmalloc_sync_all(void)
3137+
void __weak vmalloc_sync_mappings(void)
31383138
{
31393139
}
31403140

3141+
void __weak vmalloc_sync_unmappings(void)
3142+
{
3143+
}
31413144

31423145
static int f(pte_t *pte, unsigned long addr, void *data)
31433146
{

0 commit comments

Comments
 (0)