Skip to content

Commit 39b9552

Browse files
kirylIngo Molnar
authored andcommitted
x86/mm: Optimize boot-time paging mode switching cost
By this point we have functioning boot-time switching between 4- and 5-level paging mode. But naive approach comes with cost. Numbers below are for kernel build, allmodconfig, 5 times. CONFIG_X86_5LEVEL=n: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17308719.892691 task-clock:u (msec) # 26.772 CPUs utilized ( +- 0.11% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,993,164 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,614,978,867,455 cycles:u # 2.520 GHz ( +- 0.01% ) 39,371,534,575,126 stalled-cycles-frontend:u # 90.27% frontend cycles idle ( +- 0.09% ) 28,363,350,152,428 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,784,066,413 branches:u # 364.948 M/sec ( +- 0.00% ) 250,808,144,781 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 646.531974142 seconds time elapsed ( +- 1.15% ) CONFIG_X86_5LEVEL=y: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17411536.780625 task-clock:u (msec) # 26.426 CPUs utilized ( +- 0.10% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,868,663 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,865,909,056,301 cycles:u # 2.519 GHz ( +- 0.01% ) 39,740,130,365,581 stalled-cycles-frontend:u # 90.59% frontend cycles idle ( +- 0.05% ) 28,363,358,997,959 instructions:u # 0.65 insn per cycle # 1.40 stalled cycles per insn ( +- 0.00% ) 6,316,784,937,460 branches:u # 362.793 M/sec ( +- 0.00% ) 251,531,919,485 branch-misses:u # 3.98% of all branches ( +- 0.00% ) 658.886307752 seconds time elapsed ( +- 0.92% ) The patch tries to fix the performance regression by using cpu_feature_enabled(X86_FEATURE_LA57) instead of pgtable_l5_enabled in all hot code paths. These will statically patch the target code for additional performance. CONFIG_X86_5LEVEL=y + the patch: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17381990.268506 task-clock:u (msec) # 26.907 CPUs utilized ( +- 0.19% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,862,625 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,697,726,320,051 cycles:u # 2.514 GHz ( +- 0.03% ) 39,480,408,690,401 stalled-cycles-frontend:u # 90.35% frontend cycles idle ( +- 0.05% ) 28,363,394,221,388 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,794,985,573 branches:u # 363.410 M/sec ( +- 0.00% ) 251,013,232,547 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 645.991174661 seconds time elapsed ( +- 1.19% ) Unfortunately, this approach doesn't help with text size: vmlinux.before .text size: 8190319 vmlinux.after .text size: 8200623 The .text section is increased by about 4k. Not sure if we can do anything about this. Signed-off-by: Kirill A. Shuemov <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Arjan van de Ven <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Dan Williams <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David Woodhouse <[email protected]> Cc: Josh Poimboeuf <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 92e1c5b commit 39b9552

File tree

6 files changed

+25
-13
lines changed

6 files changed

+25
-13
lines changed

arch/x86/boot/compressed/misc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
#undef CONFIG_PARAVIRT_SPINLOCKS
1313
#undef CONFIG_KASAN
1414

15+
#ifdef CONFIG_X86_5LEVEL
16+
/* cpu_feature_enabled() cannot be used that early */
17+
#define pgtable_l5_enabled __pgtable_l5_enabled
18+
#endif
19+
1520
#include <linux/linkage.h>
1621
#include <linux/screen_info.h>
1722
#include <linux/elf.h>

arch/x86/entry/entry_64.S

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -275,15 +275,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
275275
* depending on paging mode) in the address.
276276
*/
277277
#ifdef CONFIG_X86_5LEVEL
278-
testl $1, pgtable_l5_enabled(%rip)
279-
jz 1f
280-
shl $(64 - 57), %rcx
281-
sar $(64 - 57), %rcx
282-
jmp 2f
283-
1:
284-
shl $(64 - 48), %rcx
285-
sar $(64 - 48), %rcx
286-
2:
278+
ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
279+
"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
287280
#else
288281
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
289282
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx

arch/x86/include/asm/pgtable_64_types.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ typedef unsigned long pgprotval_t;
2121
typedef struct { pteval_t pte; } pte_t;
2222

2323
#ifdef CONFIG_X86_5LEVEL
24-
extern unsigned int pgtable_l5_enabled;
24+
extern unsigned int __pgtable_l5_enabled;
25+
#ifndef pgtable_l5_enabled
26+
#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
27+
#endif
2528
#else
2629
#define pgtable_l5_enabled 0
2730
#endif

arch/x86/kernel/head64.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
#include <asm/microcode.h>
3333
#include <asm/kasan.h>
3434

35+
#ifdef CONFIG_X86_5LEVEL
36+
#undef pgtable_l5_enabled
37+
#define pgtable_l5_enabled __pgtable_l5_enabled
38+
#endif
39+
3540
/*
3641
* Manage page tables very early on.
3742
*/
@@ -40,8 +45,8 @@ static unsigned int __initdata next_early_pgt;
4045
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
4146

4247
#ifdef CONFIG_X86_5LEVEL
43-
unsigned int pgtable_l5_enabled __ro_after_init;
44-
EXPORT_SYMBOL(pgtable_l5_enabled);
48+
unsigned int __pgtable_l5_enabled __ro_after_init;
49+
EXPORT_SYMBOL(__pgtable_l5_enabled);
4550
unsigned int pgdir_shift __ro_after_init = 39;
4651
EXPORT_SYMBOL(pgdir_shift);
4752
unsigned int ptrs_per_p4d __ro_after_init = 1;

arch/x86/kernel/head_64.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ ENTRY(secondary_startup_64)
124124
/* Enable PAE mode, PGE and LA57 */
125125
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
126126
#ifdef CONFIG_X86_5LEVEL
127-
testl $1, pgtable_l5_enabled(%rip)
127+
testl $1, __pgtable_l5_enabled(%rip)
128128
jz 1f
129129
orl $X86_CR4_LA57, %ecx
130130
1:

arch/x86/mm/kasan_init_64.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
// SPDX-License-Identifier: GPL-2.0
22
#define DISABLE_BRANCH_PROFILING
33
#define pr_fmt(fmt) "kasan: " fmt
4+
5+
#ifdef CONFIG_X86_5LEVEL
6+
/* Too early to use cpu_feature_enabled() */
7+
#define pgtable_l5_enabled __pgtable_l5_enabled
8+
#endif
9+
410
#include <linux/bootmem.h>
511
#include <linux/kasan.h>
612
#include <linux/kdebug.h>

0 commit comments

Comments
 (0)