Skip to content

Commit e8c7106

Browse files
Matt FlemingIngo Molnar
authored andcommitted
x86, efi: Calling __pa() with an ioremap()ed address is invalid
If we encounter an efi_memory_desc_t without EFI_MEMORY_WB set in ->attribute we currently call set_memory_uc(), which in turn calls __pa() on a potentially ioremap'd address. On CONFIG_X86_32 this is invalid, resulting in the following oops on some machines: BUG: unable to handle kernel paging request at f7f22280 IP: [<c10257b9>] reserve_ram_pages_type+0x89/0x210 [...] Call Trace: [<c104f8ca>] ? page_is_ram+0x1a/0x40 [<c1025aff>] reserve_memtype+0xdf/0x2f0 [<c1024dc9>] set_memory_uc+0x49/0xa0 [<c19334d0>] efi_enter_virtual_mode+0x1c2/0x3aa [<c19216d4>] start_kernel+0x291/0x2f2 [<c19211c7>] ? loglevel+0x1b/0x1b [<c19210bf>] i386_start_kernel+0xbf/0xc8 A better approach to this problem is to map the memory region with the correct attributes from the start, instead of modifying it after the fact. The uncached case can be handled by ioremap_nocache() and the cached by ioremap_cache(). Despite first impressions, it's not possible to use ioremap_cache() to map all cached memory regions on CONFIG_X86_64 because EFI_RUNTIME_SERVICES_DATA regions really don't like being mapped into the vmalloc space, as detailed in the following bug report, https://bugzilla.redhat.com/show_bug.cgi?id=748516 Therefore, we need to ensure that any EFI_RUNTIME_SERVICES_DATA regions are covered by the direct kernel mapping table on CONFIG_X86_64. To accomplish this we now map E820_RESERVED_EFI regions via the direct kernel mapping with the initial call to init_memory_mapping() in setup_arch(), whereas previously these regions wouldn't be mapped if they were after the last E820_RAM region until efi_ioremap() was called. Doing it this way allows us to delete efi_ioremap() completely. Signed-off-by: Matt Fleming <[email protected]> Cc: H. Peter Anvin <[email protected]> Cc: Matthew Garrett <[email protected]> Cc: Zhang Rui <[email protected]> Cc: Huang Ying <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Andrew Morton <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 2ded6e6 commit e8c7106

File tree

6 files changed

+48
-35
lines changed

6 files changed

+48
-35
lines changed

arch/x86/include/asm/e820.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@
5353
*/
5454
#define E820_RESERVED_KERN 128
5555

56+
/*
57+
* Address ranges that need to be mapped by the kernel direct
58+
* mapping. This is used to make sure regions such as
59+
* EFI_RUNTIME_SERVICES_DATA are directly mapped. See setup_arch().
60+
*/
61+
#define E820_RESERVED_EFI 129
62+
5663
#ifndef __ASSEMBLY__
5764
#include <linux/types.h>
5865
struct e820entry {
@@ -115,6 +122,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
115122
}
116123
#endif
117124

125+
extern unsigned long e820_end_pfn(unsigned long limit_pfn, unsigned type);
118126
extern unsigned long e820_end_of_ram_pfn(void);
119127
extern unsigned long e820_end_of_low_ram_pfn(void);
120128
extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);

arch/x86/include/asm/efi.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
3333
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
3434
efi_call_virt(f, a1, a2, a3, a4, a5, a6)
3535

36-
#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
37-
3836
#else /* !CONFIG_X86_32 */
3937

4038
extern u64 efi_call0(void *fp);
@@ -84,9 +82,6 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
8482
efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
8583
(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
8684

87-
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
88-
u32 type);
89-
9085
#endif /* CONFIG_X86_32 */
9186

9287
extern int add_efi_memmap;

arch/x86/kernel/e820.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ static void __init e820_print_type(u32 type)
135135
printk(KERN_CONT "(usable)");
136136
break;
137137
case E820_RESERVED:
138+
case E820_RESERVED_EFI:
138139
printk(KERN_CONT "(reserved)");
139140
break;
140141
case E820_ACPI:
@@ -783,7 +784,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
783784
/*
784785
* Find the highest page frame number we have available
785786
*/
786-
static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
787+
unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
787788
{
788789
int i;
789790
unsigned long last_pfn = 0;

arch/x86/kernel/setup.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,8 @@ early_param("reservelow", parse_reservelow);
691691

692692
void __init setup_arch(char **cmdline_p)
693693
{
694+
unsigned long end_pfn;
695+
694696
#ifdef CONFIG_X86_32
695697
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
696698
visws_early_detect();
@@ -932,7 +934,24 @@ void __init setup_arch(char **cmdline_p)
932934
init_gbpages();
933935

934936
/* max_pfn_mapped is updated here */
935-
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
937+
end_pfn = max_low_pfn;
938+
939+
#ifdef CONFIG_X86_64
940+
/*
941+
* There may be regions after the last E820_RAM region that we
942+
* want to include in the kernel direct mapping, such as
943+
* EFI_RUNTIME_SERVICES_DATA.
944+
*/
945+
if (efi_enabled) {
946+
unsigned long efi_end;
947+
948+
efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI);
949+
if (efi_end > max_low_pfn)
950+
end_pfn = efi_end;
951+
}
952+
#endif
953+
954+
max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT);
936955
max_pfn_mapped = max_low_pfn_mapped;
937956

938957
#ifdef CONFIG_X86_64

arch/x86/platform/efi/efi.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,13 @@ static void __init do_add_efi_memmap(void)
323323
case EFI_UNUSABLE_MEMORY:
324324
e820_type = E820_UNUSABLE;
325325
break;
326+
case EFI_RUNTIME_SERVICES_DATA:
327+
e820_type = E820_RESERVED_EFI;
328+
break;
326329
default:
327330
/*
328331
* EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
329-
* EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
332+
* EFI_MEMORY_MAPPED_IO
330333
* EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
331334
*/
332335
e820_type = E820_RESERVED;
@@ -671,10 +674,21 @@ void __init efi_enter_virtual_mode(void)
671674
end_pfn = PFN_UP(end);
672675
if (end_pfn <= max_low_pfn_mapped
673676
|| (end_pfn > (1UL << (32 - PAGE_SHIFT))
674-
&& end_pfn <= max_pfn_mapped))
677+
&& end_pfn <= max_pfn_mapped)) {
675678
va = __va(md->phys_addr);
676-
else
677-
va = efi_ioremap(md->phys_addr, size, md->type);
679+
680+
if (!(md->attribute & EFI_MEMORY_WB)) {
681+
addr = (u64) (unsigned long)va;
682+
npages = md->num_pages;
683+
memrange_efi_to_native(&addr, &npages);
684+
set_memory_uc(addr, npages);
685+
}
686+
} else {
687+
if (!(md->attribute & EFI_MEMORY_WB))
688+
va = ioremap_nocache(md->phys_addr, size);
689+
else
690+
va = ioremap_cache(md->phys_addr, size);
691+
}
678692

679693
md->virt_addr = (u64) (unsigned long) va;
680694

@@ -684,13 +698,6 @@ void __init efi_enter_virtual_mode(void)
684698
continue;
685699
}
686700

687-
if (!(md->attribute & EFI_MEMORY_WB)) {
688-
addr = md->virt_addr;
689-
npages = md->num_pages;
690-
memrange_efi_to_native(&addr, &npages);
691-
set_memory_uc(addr, npages);
692-
}
693-
694701
systab = (u64) (unsigned long) efi_phys.systab;
695702
if (md->phys_addr <= systab && systab < end) {
696703
systab += md->virt_addr - md->phys_addr;

arch/x86/platform/efi/efi_64.c

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,20 +80,3 @@ void __init efi_call_phys_epilog(void)
8080
local_irq_restore(efi_flags);
8181
early_code_mapping_set_exec(0);
8282
}
83-
84-
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
85-
u32 type)
86-
{
87-
unsigned long last_map_pfn;
88-
89-
if (type == EFI_MEMORY_MAPPED_IO)
90-
return ioremap(phys_addr, size);
91-
92-
last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
93-
if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
94-
unsigned long top = last_map_pfn << PAGE_SHIFT;
95-
efi_ioremap(top, size - (top - phys_addr), type);
96-
}
97-
98-
return (void __iomem *)__va(phys_addr);
99-
}

0 commit comments

Comments
 (0)