Skip to content

Commit f08fb25

Browse files
npigginmpe
authored andcommitted
powerpc/64s: Fix unrecoverable MCE calling async handler from NMI
The machine check handler is not considered NMI on 64s. The early handler is the true NMI handler, and then it schedules the machine_check_exception handler to run when interrupts are enabled. This works fine except the case of an unrecoverable MCE, where the true NMI is taken when MSR[RI] is clear, it can not recover, so it calls machine_check_exception directly so something might be done about it. Calling an async handler from NMI context can result in irq state and other things getting corrupted. This can also trigger the BUG at arch/powerpc/include/asm/interrupt.h:168 BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); Fix this by making an _async version of the handler which is called in the normal case, and a NMI version that is called for unrecoverable interrupts. Fixes: 2b43dd7 ("powerpc/64: enable MSR[EE] in irq replay pt_regs") Signed-off-by: Nicholas Piggin <[email protected]> Tested-by: Cédric Le Goater <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 768c470 commit f08fb25

File tree

3 files changed

+26
-18
lines changed

3 files changed

+26
-18
lines changed

arch/powerpc/include/asm/interrupt.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -528,10 +528,9 @@ static __always_inline long ____##func(struct pt_regs *regs)
528528
/* kernel/traps.c */
529529
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
530530
#ifdef CONFIG_PPC_BOOK3S_64
531-
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception);
532-
#else
533-
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
531+
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
534532
#endif
533+
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
535534
DECLARE_INTERRUPT_HANDLER(SMIException);
536535
DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
537536
DECLARE_INTERRUPT_HANDLER(unknown_exception);

arch/powerpc/kernel/exceptions-64s.S

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,7 +1243,7 @@ EXC_COMMON_BEGIN(machine_check_common)
12431243
li r10,MSR_RI
12441244
mtmsrd r10,1
12451245
addi r3,r1,STACK_FRAME_OVERHEAD
1246-
bl machine_check_exception
1246+
bl machine_check_exception_async
12471247
b interrupt_return_srr
12481248

12491249

@@ -1303,7 +1303,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
13031303
subi r12,r12,1
13041304
sth r12,PACA_IN_MCE(r13)
13051305

1306-
/* Invoke machine_check_exception to print MCE event and panic. */
1306+
/*
1307+
* Invoke machine_check_exception to print MCE event and panic.
1308+
* This is the NMI version of the handler because we are called from
1309+
* the early handler which is a true NMI.
1310+
*/
13071311
addi r3,r1,STACK_FRAME_OVERHEAD
13081312
bl machine_check_exception
13091313

arch/powerpc/kernel/traps.c

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -796,24 +796,22 @@ void die_mce(const char *str, struct pt_regs *regs, long err)
796796
* do_exit() checks for in_interrupt() and panics in that case, so
797797
* exit the irq/nmi before calling die.
798798
*/
799-
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
800-
irq_exit();
801-
else
799+
if (in_nmi())
802800
nmi_exit();
801+
else
802+
irq_exit();
803803
die(str, regs, err);
804804
}
805805

806806
/*
807-
* BOOK3S_64 does not call this handler as a non-maskable interrupt
807+
* BOOK3S_64 does not usually call this handler as a non-maskable interrupt
808808
* (it uses its own early real-mode handler to handle the MCE proper
809809
* and then raises irq_work to call this handler when interrupts are
810-
* enabled).
810+
* enabled). The only time when this is not true is if the early handler
811+
* is unrecoverable, then it does call this directly to try to get a
812+
* message out.
811813
*/
812-
#ifdef CONFIG_PPC_BOOK3S_64
813-
DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
814-
#else
815-
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
816-
#endif
814+
static void __machine_check_exception(struct pt_regs *regs)
817815
{
818816
int recover = 0;
819817

@@ -847,12 +845,19 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
847845
/* Must die if the interrupt is not recoverable */
848846
if (regs_is_unrecoverable(regs))
849847
die_mce("Unrecoverable Machine check", regs, SIGBUS);
848+
}
850849

851850
#ifdef CONFIG_PPC_BOOK3S_64
852-
return;
853-
#else
854-
return 0;
851+
DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
852+
{
853+
__machine_check_exception(regs);
854+
}
855855
#endif
856+
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
857+
{
858+
__machine_check_exception(regs);
859+
860+
return 0;
856861
}
857862

858863
DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */

0 commit comments

Comments
 (0)