Skip to content

Commit f5deb79

Browse files
yhuang-intelH. Peter Anvin
authored andcommitted
x86: kexec: Use one page table in x86_64 machine_kexec
Impact: reduce kernel BSS size by 7 pages, improve code readability Two page tables are used in current x86_64 kexec implementation. One is used to jump from kernel virtual address to identity map address, the other is used to map all physical memory. In fact, on x86_64, there is no conflict between kernel virtual address space and physical memory space, so just one page table is sufficient. The page table pages used to map control page are dynamically allocated to save memory if kexec image is not loaded. ASM code used to map control page is replaced by C code too. Signed-off-by: Huang Ying <[email protected]> Signed-off-by: H. Peter Anvin <[email protected]>
1 parent c415b3d commit f5deb79

File tree

3 files changed

+67
-167
lines changed

3 files changed

+67
-167
lines changed

arch/x86/include/asm/kexec.h

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,8 @@
99
# define PAGES_NR 4
1010
#else
1111
# define PA_CONTROL_PAGE 0
12-
# define VA_CONTROL_PAGE 1
13-
# define PA_PGD 2
14-
# define VA_PGD 3
15-
# define PA_PUD_0 4
16-
# define VA_PUD_0 5
17-
# define PA_PMD_0 6
18-
# define VA_PMD_0 7
19-
# define PA_PTE_0 8
20-
# define VA_PTE_0 9
21-
# define PA_PUD_1 10
22-
# define VA_PUD_1 11
23-
# define PA_PMD_1 12
24-
# define VA_PMD_1 13
25-
# define PA_PTE_1 14
26-
# define VA_PTE_1 15
27-
# define PA_TABLE_PAGE 16
28-
# define PAGES_NR 17
12+
# define PA_TABLE_PAGE 1
13+
# define PAGES_NR 2
2914
#endif
3015

3116
#ifdef CONFIG_X86_32
@@ -157,9 +142,9 @@ relocate_kernel(unsigned long indirection_page,
157142
unsigned long start_address) ATTRIB_NORET;
158143
#endif
159144

160-
#ifdef CONFIG_X86_32
161145
#define ARCH_HAS_KIMAGE_ARCH
162146

147+
#ifdef CONFIG_X86_32
163148
struct kimage_arch {
164149
pgd_t *pgd;
165150
#ifdef CONFIG_X86_PAE
@@ -169,6 +154,12 @@ struct kimage_arch {
169154
pte_t *pte0;
170155
pte_t *pte1;
171156
};
157+
#else
158+
struct kimage_arch {
159+
pud_t *pud;
160+
pmd_t *pmd;
161+
pte_t *pte;
162+
};
172163
#endif
173164

174165
#endif /* __ASSEMBLY__ */

arch/x86/kernel/machine_kexec_64.c

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,6 @@
1818
#include <asm/mmu_context.h>
1919
#include <asm/io.h>
2020

21-
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
22-
static u64 kexec_pgd[512] PAGE_ALIGNED;
23-
static u64 kexec_pud0[512] PAGE_ALIGNED;
24-
static u64 kexec_pmd0[512] PAGE_ALIGNED;
25-
static u64 kexec_pte0[512] PAGE_ALIGNED;
26-
static u64 kexec_pud1[512] PAGE_ALIGNED;
27-
static u64 kexec_pmd1[512] PAGE_ALIGNED;
28-
static u64 kexec_pte1[512] PAGE_ALIGNED;
29-
3021
static void init_level2_page(pmd_t *level2p, unsigned long addr)
3122
{
3223
unsigned long end_addr;
@@ -107,12 +98,65 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
10798
return result;
10899
}
109100

101+
static void free_transition_pgtable(struct kimage *image)
102+
{
103+
free_page((unsigned long)image->arch.pud);
104+
free_page((unsigned long)image->arch.pmd);
105+
free_page((unsigned long)image->arch.pte);
106+
}
107+
108+
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
109+
{
110+
pud_t *pud;
111+
pmd_t *pmd;
112+
pte_t *pte;
113+
unsigned long vaddr, paddr;
114+
int result = -ENOMEM;
115+
116+
vaddr = (unsigned long)relocate_kernel;
117+
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
118+
pgd += pgd_index(vaddr);
119+
if (!pgd_present(*pgd)) {
120+
pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
121+
if (!pud)
122+
goto err;
123+
image->arch.pud = pud;
124+
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
125+
}
126+
pud = pud_offset(pgd, vaddr);
127+
if (!pud_present(*pud)) {
128+
pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
129+
if (!pmd)
130+
goto err;
131+
image->arch.pmd = pmd;
132+
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
133+
}
134+
pmd = pmd_offset(pud, vaddr);
135+
if (!pmd_present(*pmd)) {
136+
pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
137+
if (!pte)
138+
goto err;
139+
image->arch.pte = pte;
140+
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
141+
}
142+
pte = pte_offset_kernel(pmd, vaddr);
143+
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
144+
return 0;
145+
err:
146+
free_transition_pgtable(image);
147+
return result;
148+
}
149+
110150

111151
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
112152
{
113153
pgd_t *level4p;
154+
int result;
114155
level4p = (pgd_t *)__va(start_pgtable);
115-
return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
156+
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
157+
if (result)
158+
return result;
159+
return init_transition_pgtable(image, level4p);
116160
}
117161

118162
static void set_idt(void *newidt, u16 limit)
@@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image)
174218

175219
void machine_kexec_cleanup(struct kimage *image)
176220
{
177-
return;
221+
free_transition_pgtable(image);
178222
}
179223

180224
/*
@@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image)
195239
memcpy(control_page, relocate_kernel, PAGE_SIZE);
196240

197241
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
198-
page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
199-
page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
200-
page_list[VA_PGD] = (unsigned long)kexec_pgd;
201-
page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
202-
page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
203-
page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
204-
page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
205-
page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
206-
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
207-
page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
208-
page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
209-
page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
210-
page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
211-
page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
212-
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
213-
214242
page_list[PA_TABLE_PAGE] =
215243
(unsigned long)__pa(page_address(image->control_code_page));
216244

arch/x86/kernel/relocate_kernel_64.S

Lines changed: 3 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -29,122 +29,6 @@ relocate_kernel:
2929
* %rdx start address
3030
*/
3131

32-
/* map the control page at its virtual address */
33-
34-
movq $0x0000ff8000000000, %r10 /* mask */
35-
mov $(39 - 3), %cl /* bits to shift */
36-
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
37-
38-
movq %r11, %r9
39-
andq %r10, %r9
40-
shrq %cl, %r9
41-
42-
movq PTR(VA_PGD)(%rsi), %r8
43-
addq %r8, %r9
44-
movq PTR(PA_PUD_0)(%rsi), %r8
45-
orq $PAGE_ATTR, %r8
46-
movq %r8, (%r9)
47-
48-
shrq $9, %r10
49-
sub $9, %cl
50-
51-
movq %r11, %r9
52-
andq %r10, %r9
53-
shrq %cl, %r9
54-
55-
movq PTR(VA_PUD_0)(%rsi), %r8
56-
addq %r8, %r9
57-
movq PTR(PA_PMD_0)(%rsi), %r8
58-
orq $PAGE_ATTR, %r8
59-
movq %r8, (%r9)
60-
61-
shrq $9, %r10
62-
sub $9, %cl
63-
64-
movq %r11, %r9
65-
andq %r10, %r9
66-
shrq %cl, %r9
67-
68-
movq PTR(VA_PMD_0)(%rsi), %r8
69-
addq %r8, %r9
70-
movq PTR(PA_PTE_0)(%rsi), %r8
71-
orq $PAGE_ATTR, %r8
72-
movq %r8, (%r9)
73-
74-
shrq $9, %r10
75-
sub $9, %cl
76-
77-
movq %r11, %r9
78-
andq %r10, %r9
79-
shrq %cl, %r9
80-
81-
movq PTR(VA_PTE_0)(%rsi), %r8
82-
addq %r8, %r9
83-
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
84-
orq $PAGE_ATTR, %r8
85-
movq %r8, (%r9)
86-
87-
/* identity map the control page at its physical address */
88-
89-
movq $0x0000ff8000000000, %r10 /* mask */
90-
mov $(39 - 3), %cl /* bits to shift */
91-
movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
92-
93-
movq %r11, %r9
94-
andq %r10, %r9
95-
shrq %cl, %r9
96-
97-
movq PTR(VA_PGD)(%rsi), %r8
98-
addq %r8, %r9
99-
movq PTR(PA_PUD_1)(%rsi), %r8
100-
orq $PAGE_ATTR, %r8
101-
movq %r8, (%r9)
102-
103-
shrq $9, %r10
104-
sub $9, %cl
105-
106-
movq %r11, %r9
107-
andq %r10, %r9
108-
shrq %cl, %r9
109-
110-
movq PTR(VA_PUD_1)(%rsi), %r8
111-
addq %r8, %r9
112-
movq PTR(PA_PMD_1)(%rsi), %r8
113-
orq $PAGE_ATTR, %r8
114-
movq %r8, (%r9)
115-
116-
shrq $9, %r10
117-
sub $9, %cl
118-
119-
movq %r11, %r9
120-
andq %r10, %r9
121-
shrq %cl, %r9
122-
123-
movq PTR(VA_PMD_1)(%rsi), %r8
124-
addq %r8, %r9
125-
movq PTR(PA_PTE_1)(%rsi), %r8
126-
orq $PAGE_ATTR, %r8
127-
movq %r8, (%r9)
128-
129-
shrq $9, %r10
130-
sub $9, %cl
131-
132-
movq %r11, %r9
133-
andq %r10, %r9
134-
shrq %cl, %r9
135-
136-
movq PTR(VA_PTE_1)(%rsi), %r8
137-
addq %r8, %r9
138-
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
139-
orq $PAGE_ATTR, %r8
140-
movq %r8, (%r9)
141-
142-
relocate_new_kernel:
143-
/* %rdi indirection_page
144-
* %rsi page_list
145-
* %rdx start address
146-
*/
147-
14832
/* zero out flags, and disable interrupts */
14933
pushq $0
15034
popfq
@@ -156,9 +40,8 @@ relocate_new_kernel:
15640
/* get physical address of page table now too */
15741
movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
15842

159-
/* switch to new set of page tables */
160-
movq PTR(PA_PGD)(%rsi), %r9
161-
movq %r9, %cr3
43+
/* Switch to the identity mapped page tables */
44+
movq %rcx, %cr3
16245

16346
/* setup a new stack at the end of the physical control page */
16447
lea PAGE_SIZE(%r8), %rsp
@@ -194,9 +77,7 @@ identity_mapped:
19477
jmp 1f
19578
1:
19679

197-
/* Switch to the identity mapped page tables,
198-
* and flush the TLB.
199-
*/
80+
/* Flush the TLB (needed?) */
20081
movq %rcx, %cr3
20182

20283
/* Do the copies */

0 commit comments

Comments
 (0)