Skip to content

Commit 05c6257

Browse files
valschneiderakpm00
authored andcommitted
panic, kexec: make __crash_kexec() NMI safe
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by: 6ce47fd ("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/[email protected] Fixes: 6ce47fd ("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Baoquan He <[email protected]> Cc: "Eric W . Biederman" <[email protected]> Cc: Juri Lelli <[email protected]> Cc: Luis Claudio R. Goncalves <[email protected]> Cc: Miaohe Lin <[email protected]> Cc: Petr Mladek <[email protected]> Cc: Sebastian Andrzej Siewior <[email protected]> Cc: Thomas Gleixner <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 7bb5da0 commit 05c6257

File tree

4 files changed

+30
-20
lines changed

4 files changed

+30
-20
lines changed

kernel/kexec.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
9393

9494
/*
9595
* Because we write directly to the reserved memory region when loading
96-
* crash kernels we need a mutex here to prevent multiple crash kernels
97-
* from attempting to load simultaneously, and to prevent a crash kernel
98-
* from loading over the top of a in use crash kernel.
99-
*
100-
* KISS: always take the mutex.
96+
* crash kernels we need a serialization here to prevent multiple crash
97+
* kernels from attempting to load simultaneously.
10198
*/
102-
if (!mutex_trylock(&kexec_mutex))
99+
if (!kexec_trylock())
103100
return -EBUSY;
104101

105102
if (flags & KEXEC_ON_CRASH) {
@@ -165,7 +162,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
165162

166163
kimage_free(image);
167164
out_unlock:
168-
mutex_unlock(&kexec_mutex);
165+
kexec_unlock();
169166
return ret;
170167
}
171168

kernel/kexec_core.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
#include <crypto/hash.h>
4747
#include "kexec_internal.h"
4848

49-
DEFINE_MUTEX(kexec_mutex);
49+
atomic_t __kexec_lock = ATOMIC_INIT(0);
5050

5151
/* Per cpu memory for storing cpu states in case of system crash. */
5252
note_buf_t __percpu *crash_notes;
@@ -959,15 +959,15 @@ late_initcall(kexec_core_sysctl_init);
959959
*/
960960
void __noclone __crash_kexec(struct pt_regs *regs)
961961
{
962-
/* Take the kexec_mutex here to prevent sys_kexec_load
962+
/* Take the kexec_lock here to prevent sys_kexec_load
963963
* running on one cpu from replacing the crash kernel
964964
* we are using after a panic on a different cpu.
965965
*
966966
* If the crash kernel was not located in a fixed area
967967
* of memory the xchg(&kexec_crash_image) would be
968968
* sufficient. But since I reuse the memory...
969969
*/
970-
if (mutex_trylock(&kexec_mutex)) {
970+
if (kexec_trylock()) {
971971
if (kexec_crash_image) {
972972
struct pt_regs fixed_regs;
973973

@@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
976976
machine_crash_shutdown(&fixed_regs);
977977
machine_kexec(kexec_crash_image);
978978
}
979-
mutex_unlock(&kexec_mutex);
979+
kexec_unlock();
980980
}
981981
}
982982
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -1008,13 +1008,13 @@ ssize_t crash_get_memory_size(void)
10081008
{
10091009
ssize_t size = 0;
10101010

1011-
if (!mutex_trylock(&kexec_mutex))
1011+
if (!kexec_trylock())
10121012
return -EBUSY;
10131013

10141014
if (crashk_res.end != crashk_res.start)
10151015
size = resource_size(&crashk_res);
10161016

1017-
mutex_unlock(&kexec_mutex);
1017+
kexec_unlock();
10181018
return size;
10191019
}
10201020

@@ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size)
10251025
unsigned long old_size;
10261026
struct resource *ram_res;
10271027

1028-
if (!mutex_trylock(&kexec_mutex))
1028+
if (!kexec_trylock())
10291029
return -EBUSY;
10301030

10311031
if (kexec_crash_image) {
@@ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size)
10641064
insert_resource(&iomem_resource, ram_res);
10651065

10661066
unlock:
1067-
mutex_unlock(&kexec_mutex);
1067+
kexec_unlock();
10681068
return ret;
10691069
}
10701070

@@ -1136,7 +1136,7 @@ int kernel_kexec(void)
11361136
{
11371137
int error = 0;
11381138

1139-
if (!mutex_trylock(&kexec_mutex))
1139+
if (!kexec_trylock())
11401140
return -EBUSY;
11411141
if (!kexec_image) {
11421142
error = -EINVAL;
@@ -1212,6 +1212,6 @@ int kernel_kexec(void)
12121212
#endif
12131213

12141214
Unlock:
1215-
mutex_unlock(&kexec_mutex);
1215+
kexec_unlock();
12161216
return error;
12171217
}

kernel/kexec_file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
339339

340340
image = NULL;
341341

342-
if (!mutex_trylock(&kexec_mutex))
342+
if (!kexec_trylock())
343343
return -EBUSY;
344344

345345
dest_image = &kexec_image;
@@ -411,7 +411,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
411411
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
412412
arch_kexec_protect_crashkres();
413413

414-
mutex_unlock(&kexec_mutex);
414+
kexec_unlock();
415415
kimage_free(image);
416416
return ret;
417417
}

kernel/kexec_internal.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image);
1313
int kimage_is_destination_range(struct kimage *image,
1414
unsigned long start, unsigned long end);
1515

16-
extern struct mutex kexec_mutex;
16+
/*
17+
* Whatever is used to serialize accesses to the kexec_crash_image needs to be
18+
* NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
19+
* "simple" atomic variable that is acquired with a cmpxchg().
20+
*/
21+
extern atomic_t __kexec_lock;
22+
static inline bool kexec_trylock(void)
23+
{
24+
return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
25+
}
26+
static inline void kexec_unlock(void)
27+
{
28+
atomic_set_release(&__kexec_lock, 0);
29+
}
1730

1831
#ifdef CONFIG_KEXEC_FILE
1932
#include <linux/purgatory.h>

0 commit comments

Comments
 (0)