Skip to content

Commit fd0e786

Browse files
aeglIngo Molnar
authored andcommitted
x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages
In the following commit: ce0fa3e ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") ... we added code to memory_failure() to unmap the page from the kernel 1:1 virtual address space to avoid speculative access to the page logging additional errors. But memory_failure() may not always succeed in taking the page offline, especially if the page belongs to the kernel. This can happen if there are too many corrected errors on a page and either mcelog(8) or drivers/ras/cec.c asks to take a page offline. Since we remove the 1:1 mapping early in memory_failure(), we can end up with the page unmapped, but still in use. On the next access the kernel crashes :-( There are also various debug paths that call memory_failure() to simulate occurrence of an error. Since there is no actual error in memory, we don't need to map out the page for those cases. Revert most of the previous attempt and keep the solution local to arch/x86/kernel/cpu/mcheck/mce.c. Unmap the page only when: 1) there is a real error 2) memory_failure() succeeds. All of this only applies to 64-bit systems. 32-bit kernel doesn't map all of memory into kernel space. It isn't worth adding the code to unmap the piece that is mapped because nobody would run a 32-bit kernel on a machine that has recoverable machine checks. Signed-off-by: Tony Luck <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Brian Gerst <[email protected]> Cc: Dave <[email protected]> Cc: Denys Vlasenko <[email protected]> Cc: Josh Poimboeuf <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Naoya Horiguchi <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Robert (Persistent Memory) <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: [email protected] Cc: [email protected] #v4.14 Fixes: ce0fa3e ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages") Signed-off-by: Ingo Molnar <[email protected]>
1 parent 01684e7 commit fd0e786

File tree

5 files changed

+26
-18
lines changed

5 files changed

+26
-18
lines changed

arch/x86/include/asm/page_64.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
5252

5353
void copy_page(void *to, void *from);
5454

55-
#ifdef CONFIG_X86_MCE
56-
#define arch_unmap_kpfn arch_unmap_kpfn
57-
#endif
58-
5955
#endif /* !__ASSEMBLY__ */
6056

6157
#ifdef CONFIG_X86_VSYSCALL_EMULATION

arch/x86/kernel/cpu/mcheck/mce-internal.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
115115

116116
extern struct mca_config mca_cfg;
117117

118+
#ifndef CONFIG_X86_64
119+
/*
120+
* On 32-bit systems it would be difficult to safely unmap a poison page
121+
* from the kernel 1:1 map because there are no non-canonical addresses that
122+
* we can use to refer to the address without risking a speculative access.
123+
* However, this isn't much of an issue because:
124+
* 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
125+
* are only mapped into the kernel as needed
126+
* 2) Few people would run a 32-bit kernel on a machine that supports
127+
* recoverable errors because they have too much memory to boot 32-bit.
128+
*/
129+
static inline void mce_unmap_kpfn(unsigned long pfn) {}
130+
#define mce_unmap_kpfn mce_unmap_kpfn
131+
#endif
132+
118133
#endif /* __X86_MCE_INTERNAL_H__ */

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
105105

106106
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
107107

108+
#ifndef mce_unmap_kpfn
109+
static void mce_unmap_kpfn(unsigned long pfn);
110+
#endif
111+
108112
/*
109113
* CPU/chipset specific EDAC code can register a notifier call here to print
110114
* MCE errors in a human-readable form.
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
590594

591595
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
592596
pfn = mce->addr >> PAGE_SHIFT;
593-
memory_failure(pfn, 0);
597+
if (!memory_failure(pfn, 0))
598+
mce_unmap_kpfn(pfn);
594599
}
595600

596601
return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
10571062
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
10581063
if (ret)
10591064
pr_err("Memory error not recovered");
1065+
else
1066+
mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
10601067
return ret;
10611068
}
10621069

1063-
#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
1064-
1065-
void arch_unmap_kpfn(unsigned long pfn)
1070+
#ifndef mce_unmap_kpfn
1071+
static void mce_unmap_kpfn(unsigned long pfn)
10661072
{
10671073
unsigned long decoy_addr;
10681074

@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
10731079
* We would like to just call:
10741080
* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
10751081
* but doing that would radically increase the odds of a
1076-
* speculative access to the posion page because we'd have
1082+
* speculative access to the poison page because we'd have
10771083
* the virtual address of the kernel 1:1 mapping sitting
10781084
* around in registers.
10791085
* Instead we get tricky. We create a non-canonical address
@@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
10981104

10991105
if (set_memory_np(decoy_addr, 1))
11001106
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
1101-
11021107
}
11031108
#endif
11041109

include/linux/mm_inline.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,4 @@ static __always_inline enum lru_list page_lru(struct page *page)
127127

128128
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
129129

130-
#ifdef arch_unmap_kpfn
131-
extern void arch_unmap_kpfn(unsigned long pfn);
132-
#else
133-
static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
134-
#endif
135-
136130
#endif

mm/memory-failure.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,8 +1139,6 @@ int memory_failure(unsigned long pfn, int flags)
11391139
return 0;
11401140
}
11411141

1142-
arch_unmap_kpfn(pfn);
1143-
11441142
orig_head = hpage = compound_head(p);
11451143
num_poisoned_pages_inc();
11461144

0 commit comments

Comments
 (0)