Skip to content

Commit 2fb5853

Browse files
caf-zjzhangwildea01
authored andcommitted
acpi: apei: panic OS with fatal error status block
Even if an error status block's severity is fatal, the kernel does not honor the severity level and panic. With the firmware first model, the platform could inform the OS about a fatal hardware error through the non-NMI GHES notification type. The OS should panic when a hardware error record is received with this severity. Call panic() after CPER data in error status block is printed if severity is fatal, before each error section is handled. Signed-off-by: Jonathan (Zhixiong) Zhang <[email protected]> Signed-off-by: Tyler Baicar <[email protected]> Reviewed-by: James Morse <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent 7edda08 commit 2fb5853

File tree

1 file changed

+21
-15
lines changed

1 file changed

+21
-15
lines changed

drivers/acpi/apei/ghes.c

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ static unsigned long ghes_estatus_pool_size_request;
135135
static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
136136
static atomic_t ghes_estatus_cache_alloced;
137137

138+
static int ghes_panic_timeout __read_mostly = 30;
139+
138140
static int ghes_ioremap_init(void)
139141
{
140142
ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -691,13 +693,28 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
691693
return apei_write(val, &gv2->read_ack_register);
692694
}
693695

696+
static void __ghes_panic(struct ghes *ghes)
697+
{
698+
__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
699+
700+
/* reboot to log the error! */
701+
if (!panic_timeout)
702+
panic_timeout = ghes_panic_timeout;
703+
panic("Fatal hardware error!");
704+
}
705+
694706
static int ghes_proc(struct ghes *ghes)
695707
{
696708
int rc;
697709

698710
rc = ghes_read_estatus(ghes, 0);
699711
if (rc)
700712
goto out;
713+
714+
if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
715+
__ghes_panic(ghes);
716+
}
717+
701718
if (!ghes_estatus_cached(ghes->estatus)) {
702719
if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
703720
ghes_estatus_cache_add(ghes->generic, ghes->estatus);
@@ -838,8 +855,6 @@ static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
838855

839856
static LIST_HEAD(ghes_nmi);
840857

841-
static int ghes_panic_timeout __read_mostly = 30;
842-
843858
static void ghes_proc_in_irq(struct irq_work *irq_work)
844859
{
845860
struct llist_node *llnode, *next;
@@ -925,18 +940,6 @@ static void __process_error(struct ghes *ghes)
925940
#endif
926941
}
927942

928-
static void __ghes_panic(struct ghes *ghes)
929-
{
930-
oops_begin();
931-
ghes_print_queued_estatus();
932-
__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
933-
934-
/* reboot to log the error! */
935-
if (panic_timeout == 0)
936-
panic_timeout = ghes_panic_timeout;
937-
panic("Fatal hardware error!");
938-
}
939-
940943
static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
941944
{
942945
struct ghes *ghes;
@@ -954,8 +957,11 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
954957
}
955958

956959
sev = ghes_severity(ghes->estatus->error_severity);
957-
if (sev >= GHES_SEV_PANIC)
960+
if (sev >= GHES_SEV_PANIC) {
961+
oops_begin();
962+
ghes_print_queued_estatus();
958963
__ghes_panic(ghes);
964+
}
959965

960966
if (!(ghes->flags & GHES_TO_CLEAR))
961967
continue;

0 commit comments

Comments
 (0)