Skip to content

Commit 745e3ed

Browse files
kirylbp3tk0v
authored andcommitted
efi/libstub: Implement support for unaccepted memory
UEFI Specification version 2.9 introduces the concept of memory acceptance: Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, requiring memory to be accepted before it can be used by the guest. Accepting happens via a protocol specific for the Virtual Machine platform. Accepting memory is costly and it makes VMM allocate memory for the accepted guest physical address range. It's better to postpone memory acceptance until memory is needed. It lowers boot time and reduces memory overhead. The kernel needs to know what memory has been accepted. Firmware communicates this information via memory map: a new memory type -- EFI_UNACCEPTED_MEMORY -- indicates such memory. Range-based tracking works fine for firmware, but it gets bulky for the kernel: e820 (or whatever the arch uses) has to be modified on every page acceptance. It leads to table fragmentation and there's a limited number of entries in the e820 table. Another option is to mark such memory as usable in e820 and track if the range has been accepted in a bitmap. One bit in the bitmap represents a naturally aligned power-2-sized region of address space -- unit. For x86, unit size is 2MiB: 4k of the bitmap is enough to track 64GiB or physical address space. In the worst-case scenario -- a huge hole in the middle of the address space -- It needs 256MiB to handle 4PiB of the address space. Any unaccepted memory that is not aligned to unit_size gets accepted upfront. The bitmap is allocated and constructed in the EFI stub and passed down to the kernel via EFI configuration table. allocate_e820() allocates the bitmap if unaccepted memory is present, according to the size of unaccepted region. Signed-off-by: Kirill A. Shutemov <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Reviewed-by: Ard Biesheuvel <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 2e9f46e commit 745e3ed

File tree

12 files changed

+365
-1
lines changed

12 files changed

+365
-1
lines changed

arch/x86/boot/compressed/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ endif
107107

108108
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
109109
vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
110+
vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
110111

111112
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
112113
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o

arch/x86/boot/compressed/mem.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
3+
#include "error.h"
4+
5+
void arch_accept_memory(phys_addr_t start, phys_addr_t end)
6+
{
7+
/* Platform-specific memory-acceptance call goes here */
8+
error("Cannot accept memory");
9+
}

arch/x86/include/asm/efi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ extern unsigned long efi_mixed_mode_stack_pa;
3131

3232
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
3333

34+
#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE
35+
3436
/*
3537
* The EFI services are called through variadic functions in many cases. These
3638
* functions are implemented in assembler and support only a fixed number of

drivers/firmware/efi/Kconfig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,20 @@ config EFI_COCO_SECRET
269269
virt/coco/efi_secret module to access the secrets, which in turn
270270
allows userspace programs to access the injected secrets.
271271

272+
config UNACCEPTED_MEMORY
273+
bool
274+
depends on EFI_STUB
275+
help
276+
Some Virtual Machine platforms, such as Intel TDX, require
277+
some memory to be "accepted" by the guest before it can be used.
278+
This mechanism helps prevent malicious hosts from making changes
279+
to guest memory.
280+
281+
UEFI specification v2.9 introduced EFI_UNACCEPTED_MEMORY memory type.
282+
283+
This option adds support for unaccepted memory and makes such memory
284+
usable by the kernel.
285+
272286
config EFI_EMBEDDED_FIRMWARE
273287
bool
274288
select CRYPTO_LIB_SHA256

drivers/firmware/efi/efi.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,7 @@ static __initdata char memory_type_name[][13] = {
843843
"MMIO Port",
844844
"PAL Code",
845845
"Persistent",
846+
"Unaccepted",
846847
};
847848

848849
char * __init efi_md_typeattr_format(char *buf, size_t size,

drivers/firmware/efi/libstub/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
9696
zboot-obj-$(CONFIG_RISCV) := lib-clz_ctz.o lib-ashldi3.o
9797
lib-$(CONFIG_EFI_ZBOOT) += zboot.o $(zboot-obj-y)
9898

99+
lib-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o bitmap.o find.o
100+
99101
extra-y := $(lib-y)
100102
lib-y := $(patsubst %.o,%.stub.o,$(lib-y))
101103

drivers/firmware/efi/libstub/bitmap.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#include <linux/bitmap.h>
2+
3+
void __bitmap_set(unsigned long *map, unsigned int start, int len)
4+
{
5+
unsigned long *p = map + BIT_WORD(start);
6+
const unsigned int size = start + len;
7+
int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
8+
unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
9+
10+
while (len - bits_to_set >= 0) {
11+
*p |= mask_to_set;
12+
len -= bits_to_set;
13+
bits_to_set = BITS_PER_LONG;
14+
mask_to_set = ~0UL;
15+
p++;
16+
}
17+
if (len) {
18+
mask_to_set &= BITMAP_LAST_WORD_MASK(size);
19+
*p |= mask_to_set;
20+
}
21+
}
22+
23+
void __bitmap_clear(unsigned long *map, unsigned int start, int len)
24+
{
25+
unsigned long *p = map + BIT_WORD(start);
26+
const unsigned int size = start + len;
27+
int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
28+
unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
29+
30+
while (len - bits_to_clear >= 0) {
31+
*p &= ~mask_to_clear;
32+
len -= bits_to_clear;
33+
bits_to_clear = BITS_PER_LONG;
34+
mask_to_clear = ~0UL;
35+
p++;
36+
}
37+
if (len) {
38+
mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
39+
*p &= ~mask_to_clear;
40+
}
41+
}

drivers/firmware/efi/libstub/efistub.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,4 +1136,10 @@ void efi_remap_image(unsigned long image_base, unsigned alloc_size,
11361136
asmlinkage efi_status_t __efiapi
11371137
efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
11381138

1139+
efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
1140+
struct efi_boot_memmap *map);
1141+
void process_unaccepted_memory(u64 start, u64 end);
1142+
void accept_memory(phys_addr_t start, phys_addr_t end);
1143+
void arch_accept_memory(phys_addr_t start, phys_addr_t end);
1144+
11391145
#endif

drivers/firmware/efi/libstub/find.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
#include <linux/bitmap.h>
3+
#include <linux/math.h>
4+
#include <linux/minmax.h>
5+
6+
/*
7+
* Common helper for find_next_bit() function family
8+
* @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
9+
* @MUNGE: The expression that post-processes a word containing found bit (may be empty)
10+
* @size: The bitmap size in bits
11+
* @start: The bitnumber to start searching at
12+
*/
13+
#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \
14+
({ \
15+
unsigned long mask, idx, tmp, sz = (size), __start = (start); \
16+
\
17+
if (unlikely(__start >= sz)) \
18+
goto out; \
19+
\
20+
mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \
21+
idx = __start / BITS_PER_LONG; \
22+
\
23+
for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \
24+
if ((idx + 1) * BITS_PER_LONG >= sz) \
25+
goto out; \
26+
idx++; \
27+
} \
28+
\
29+
sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \
30+
out: \
31+
sz; \
32+
})
33+
34+
unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
35+
{
36+
return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start);
37+
}
38+
39+
unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
40+
unsigned long start)
41+
{
42+
return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
43+
}
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
3+
#include <linux/efi.h>
4+
#include <asm/efi.h>
5+
#include "efistub.h"
6+
7+
struct efi_unaccepted_memory *unaccepted_table;
8+
9+
efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
10+
struct efi_boot_memmap *map)
11+
{
12+
efi_guid_t unaccepted_table_guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
13+
u64 unaccepted_start = ULLONG_MAX, unaccepted_end = 0, bitmap_size;
14+
efi_status_t status;
15+
int i;
16+
17+
/* Check if the table is already installed */
18+
unaccepted_table = get_efi_config_table(unaccepted_table_guid);
19+
if (unaccepted_table) {
20+
if (unaccepted_table->version != 1) {
21+
efi_err("Unknown version of unaccepted memory table\n");
22+
return EFI_UNSUPPORTED;
23+
}
24+
return EFI_SUCCESS;
25+
}
26+
27+
/* Check if there's any unaccepted memory and find the max address */
28+
for (i = 0; i < nr_desc; i++) {
29+
efi_memory_desc_t *d;
30+
unsigned long m = (unsigned long)map->map;
31+
32+
d = efi_early_memdesc_ptr(m, map->desc_size, i);
33+
if (d->type != EFI_UNACCEPTED_MEMORY)
34+
continue;
35+
36+
unaccepted_start = min(unaccepted_start, d->phys_addr);
37+
unaccepted_end = max(unaccepted_end,
38+
d->phys_addr + d->num_pages * PAGE_SIZE);
39+
}
40+
41+
if (unaccepted_start == ULLONG_MAX)
42+
return EFI_SUCCESS;
43+
44+
unaccepted_start = round_down(unaccepted_start,
45+
EFI_UNACCEPTED_UNIT_SIZE);
46+
unaccepted_end = round_up(unaccepted_end, EFI_UNACCEPTED_UNIT_SIZE);
47+
48+
/*
49+
* If unaccepted memory is present, allocate a bitmap to track what
50+
* memory has to be accepted before access.
51+
*
52+
* One bit in the bitmap represents 2MiB in the address space:
53+
* A 4k bitmap can track 64GiB of physical address space.
54+
*
55+
* In the worst case scenario -- a huge hole in the middle of the
56+
* address space -- It needs 256MiB to handle 4PiB of the address
57+
* space.
58+
*
59+
* The bitmap will be populated in setup_e820() according to the memory
60+
* map after efi_exit_boot_services().
61+
*/
62+
bitmap_size = DIV_ROUND_UP(unaccepted_end - unaccepted_start,
63+
EFI_UNACCEPTED_UNIT_SIZE * BITS_PER_BYTE);
64+
65+
status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
66+
sizeof(*unaccepted_table) + bitmap_size,
67+
(void **)&unaccepted_table);
68+
if (status != EFI_SUCCESS) {
69+
efi_err("Failed to allocate unaccepted memory config table\n");
70+
return status;
71+
}
72+
73+
unaccepted_table->version = 1;
74+
unaccepted_table->unit_size = EFI_UNACCEPTED_UNIT_SIZE;
75+
unaccepted_table->phys_base = unaccepted_start;
76+
unaccepted_table->size = bitmap_size;
77+
memset(unaccepted_table->bitmap, 0, bitmap_size);
78+
79+
status = efi_bs_call(install_configuration_table,
80+
&unaccepted_table_guid, unaccepted_table);
81+
if (status != EFI_SUCCESS) {
82+
efi_bs_call(free_pool, unaccepted_table);
83+
efi_err("Failed to install unaccepted memory config table!\n");
84+
}
85+
86+
return status;
87+
}
88+
89+
/*
90+
* The accepted memory bitmap only works at unit_size granularity. Take
91+
* unaligned start/end addresses and either:
92+
* 1. Accepts the memory immediately and in its entirety
93+
* 2. Accepts unaligned parts, and marks *some* aligned part unaccepted
94+
*
95+
* The function will never reach the bitmap_set() with zero bits to set.
96+
*/
97+
void process_unaccepted_memory(u64 start, u64 end)
98+
{
99+
u64 unit_size = unaccepted_table->unit_size;
100+
u64 unit_mask = unaccepted_table->unit_size - 1;
101+
u64 bitmap_size = unaccepted_table->size;
102+
103+
/*
104+
* Ensure that at least one bit will be set in the bitmap by
105+
* immediately accepting all regions under 2*unit_size. This is
106+
* imprecise and may immediately accept some areas that could
107+
* have been represented in the bitmap. But, results in simpler
108+
* code below
109+
*
110+
* Consider case like this (assuming unit_size == 2MB):
111+
*
112+
* | 4k | 2044k | 2048k |
113+
* ^ 0x0 ^ 2MB ^ 4MB
114+
*
115+
* Only the first 4k has been accepted. The 0MB->2MB region can not be
116+
* represented in the bitmap. The 2MB->4MB region can be represented in
117+
* the bitmap. But, the 0MB->4MB region is <2*unit_size and will be
118+
* immediately accepted in its entirety.
119+
*/
120+
if (end - start < 2 * unit_size) {
121+
arch_accept_memory(start, end);
122+
return;
123+
}
124+
125+
/*
126+
* No matter how the start and end are aligned, at least one unaccepted
127+
* unit_size area will remain to be marked in the bitmap.
128+
*/
129+
130+
/* Immediately accept a <unit_size piece at the start: */
131+
if (start & unit_mask) {
132+
arch_accept_memory(start, round_up(start, unit_size));
133+
start = round_up(start, unit_size);
134+
}
135+
136+
/* Immediately accept a <unit_size piece at the end: */
137+
if (end & unit_mask) {
138+
arch_accept_memory(round_down(end, unit_size), end);
139+
end = round_down(end, unit_size);
140+
}
141+
142+
/*
143+
* Accept part of the range that before phys_base and cannot be recorded
144+
* into the bitmap.
145+
*/
146+
if (start < unaccepted_table->phys_base) {
147+
arch_accept_memory(start,
148+
min(unaccepted_table->phys_base, end));
149+
start = unaccepted_table->phys_base;
150+
}
151+
152+
/* Nothing to record */
153+
if (end < unaccepted_table->phys_base)
154+
return;
155+
156+
/* Translate to offsets from the beginning of the bitmap */
157+
start -= unaccepted_table->phys_base;
158+
end -= unaccepted_table->phys_base;
159+
160+
/* Accept memory that doesn't fit into bitmap */
161+
if (end > bitmap_size * unit_size * BITS_PER_BYTE) {
162+
unsigned long phys_start, phys_end;
163+
164+
phys_start = bitmap_size * unit_size * BITS_PER_BYTE +
165+
unaccepted_table->phys_base;
166+
phys_end = end + unaccepted_table->phys_base;
167+
168+
arch_accept_memory(phys_start, phys_end);
169+
end = bitmap_size * unit_size * BITS_PER_BYTE;
170+
}
171+
172+
/*
173+
* 'start' and 'end' are now both unit_size-aligned.
174+
* Record the range as being unaccepted:
175+
*/
176+
bitmap_set(unaccepted_table->bitmap,
177+
start / unit_size, (end - start) / unit_size);
178+
}
179+
180+
void accept_memory(phys_addr_t start, phys_addr_t end)
181+
{
182+
unsigned long range_start, range_end;
183+
unsigned long bitmap_size;
184+
u64 unit_size;
185+
186+
if (!unaccepted_table)
187+
return;
188+
189+
unit_size = unaccepted_table->unit_size;
190+
191+
/*
192+
* Only care for the part of the range that is represented
193+
* in the bitmap.
194+
*/
195+
if (start < unaccepted_table->phys_base)
196+
start = unaccepted_table->phys_base;
197+
if (end < unaccepted_table->phys_base)
198+
return;
199+
200+
/* Translate to offsets from the beginning of the bitmap */
201+
start -= unaccepted_table->phys_base;
202+
end -= unaccepted_table->phys_base;
203+
204+
/* Make sure not to overrun the bitmap */
205+
if (end > unaccepted_table->size * unit_size * BITS_PER_BYTE)
206+
end = unaccepted_table->size * unit_size * BITS_PER_BYTE;
207+
208+
range_start = start / unit_size;
209+
bitmap_size = DIV_ROUND_UP(end, unit_size);
210+
211+
for_each_set_bitrange_from(range_start, range_end,
212+
unaccepted_table->bitmap, bitmap_size) {
213+
unsigned long phys_start, phys_end;
214+
215+
phys_start = range_start * unit_size + unaccepted_table->phys_base;
216+
phys_end = range_end * unit_size + unaccepted_table->phys_base;
217+
218+
arch_accept_memory(phys_start, phys_end);
219+
bitmap_clear(unaccepted_table->bitmap,
220+
range_start, range_end - range_start);
221+
}
222+
}

0 commit comments

Comments
 (0)