Skip to content

Commit ed7290d

Browse files
Andi KleenH. Peter Anvin
authored andcommitted
x86, mce: implement new status bits
The x86 architecture recently added some new machine check status bits: S(ignalled) and AR (Action-Required). Signalled allows to check if a specific event caused an exception or was just logged through CMCI. AR allows the kernel to decide if an event needs immediate action or can be delayed or ignored. Implement support for these new status bits. mce_severity() uses the new bits to grade the machine check correctly and decide what to do. The exception handler uses AR to decide to kill or not. The S bit is used to separate events between the poll/CMCI handler and the exception handler. Classical UC always leads to panic. That was true before anyways because the existing CPUs always passed a PCC with it. Also corrects the rules whether to kill in user or kernel context and how to handle missing RIPV. The machine check handler largely uses the mce-severity grading engine now instead of making its own decisions. This means the logic is centralized in one place. This is useful because it has to be evaluated multiple times. v2: Some rule fixes; Add AO events Fix RIPV, RIPV|EIPV order (Ying Huang) Fix UCNA with AR=1 message (Ying Huang) Add comment about panicing in m_c_p. Signed-off-by: Andi Kleen <[email protected]> Signed-off-by: Hidetoshi Seto <[email protected]> Signed-off-by: H. Peter Anvin <[email protected]>
1 parent 8650356 commit ed7290d

File tree

4 files changed

+137
-44
lines changed

4 files changed

+137
-44
lines changed

arch/x86/include/asm/mce.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
1616
#define MCG_EXT_CNT_SHIFT 16
1717
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
18+
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
1819

1920
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
2021
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
@@ -27,6 +28,15 @@
2728
#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
2829
#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
2930
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
31+
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
32+
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
33+
34+
/* MISC register defines */
35+
#define MCM_ADDR_SEGOFF 0 /* segment offset */
36+
#define MCM_ADDR_LINEAR 1 /* linear address */
37+
#define MCM_ADDR_PHYS 2 /* physical address */
38+
#define MCM_ADDR_MEM 3 /* memory address */
39+
#define MCM_ADDR_GENERIC 7 /* generic */
3040

3141
/* Fields are zero when not available */
3242
struct mce {

arch/x86/kernel/cpu/mcheck/mce-internal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,14 @@
22

33
enum severity_level {
44
MCE_NO_SEVERITY,
5+
MCE_KEEP_SEVERITY,
56
MCE_SOME_SEVERITY,
7+
MCE_AO_SEVERITY,
68
MCE_UC_SEVERITY,
9+
MCE_AR_SEVERITY,
710
MCE_PANIC_SEVERITY,
811
};
912

1013
int mce_severity(struct mce *a, int tolerant, char **msg);
14+
15+
extern int mce_ser;

arch/x86/kernel/cpu/mcheck/mce-severity.c

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,43 +19,117 @@
1919
* first. Since there are quite a lot of combinations test the bits in a
2020
* table-driven way. The rules are simply processed in order, first
2121
* match wins.
22+
*
23+
* Note this is only used for machine check exceptions, the corrected
24+
* errors use much simpler rules. The exceptions still check for the corrected
25+
* errors, but only to leave them alone for the CMCI handler (except for
26+
* panic situations)
2227
*/
2328

29+
enum context { IN_KERNEL = 1, IN_USER = 2 };
30+
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
31+
2432
static struct severity {
2533
u64 mask;
2634
u64 result;
2735
unsigned char sev;
2836
unsigned char mcgmask;
2937
unsigned char mcgres;
38+
unsigned char ser;
39+
unsigned char context;
3040
char *msg;
3141
} severities[] = {
42+
#define KERNEL .context = IN_KERNEL
43+
#define USER .context = IN_USER
44+
#define SER .ser = SER_REQUIRED
45+
#define NOSER .ser = NO_SER
3246
#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
3347
#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
3448
#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
3549
#define MCGMASK(x, res, s, m, r...) \
3650
{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
51+
#define MASK(x, y, s, m, r...) \
52+
{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
53+
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
54+
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
55+
#define MCACOD 0xffff
56+
3757
BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
3858
BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
3959
BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
40-
MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"),
60+
/* When MCIP is not set something is very confused */
61+
MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
62+
/* Neither return not error IP -- no chance to recover -> PANIC */
63+
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
64+
"Neither restart nor error IP"),
65+
MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
66+
KERNEL),
67+
BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
68+
MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
69+
"Spurious not enabled", SER),
70+
71+
/* ignore OVER for UCNA */
72+
MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
73+
"Uncorrected no action required", SER),
74+
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
75+
"Illegal combination (UCNA with AR=1)", SER),
76+
MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
77+
78+
/* AR add known MCACODs here */
79+
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
80+
"Action required with lost events", SER),
81+
MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
82+
"Action required; unknown MCACOD", SER),
83+
84+
/* known AO MCACODs: */
85+
MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
86+
"Action optional: memory scrubbing error", SER),
87+
MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
88+
"Action optional: last level cache writeback error", SER),
89+
90+
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
91+
"Action optional unknown MCACOD", SER),
92+
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
93+
"Action optional with lost events", SER),
4194
BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
4295
BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
4396
BITSET(0, SOME, "No match") /* always matches. keep at end */
4497
};
4598

99+
/*
100+
* If the EIPV bit is set, it means the saved IP is the
101+
* instruction which caused the MCE.
102+
*/
103+
static int error_context(struct mce *m)
104+
{
105+
if (m->mcgstatus & MCG_STATUS_EIPV)
106+
return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
107+
/* Unknown, assume kernel */
108+
return IN_KERNEL;
109+
}
110+
46111
int mce_severity(struct mce *a, int tolerant, char **msg)
47112
{
113+
enum context ctx = error_context(a);
48114
struct severity *s;
115+
49116
for (s = severities;; s++) {
50117
if ((a->status & s->mask) != s->result)
51118
continue;
52119
if ((a->mcgstatus & s->mcgmask) != s->mcgres)
53120
continue;
54-
if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) &&
55-
tolerant < 1)
56-
return MCE_PANIC_SEVERITY;
121+
if (s->ser == SER_REQUIRED && !mce_ser)
122+
continue;
123+
if (s->ser == NO_SER && mce_ser)
124+
continue;
125+
if (s->context && ctx != s->context)
126+
continue;
57127
if (msg)
58128
*msg = s->msg;
129+
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
130+
if (panic_on_oops || tolerant < 1)
131+
return MCE_PANIC_SEVERITY;
132+
}
59133
return s->sev;
60134
}
61135
}

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ static int rip_msr;
8383
static int mce_bootlog = -1;
8484
static int monarch_timeout = -1;
8585
static int mce_panic_timeout;
86+
int mce_ser;
8687

8788
static char trigger[128];
8889
static char *trigger_argv[2] = { trigger, NULL };
@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
391392
* Those are just logged through /dev/mcelog.
392393
*
393394
* This is executed in standard interrupt context.
395+
*
396+
* Note: spec recommends to panic for fatal unsignalled
397+
* errors here. However this would be quite problematic --
398+
* we would need to reimplement the Monarch handling and
399+
* it would mess up the exclusion between exception handler
400+
* and poll hander -- * so we skip this for now.
401+
* These cases should not happen anyways, or only when the CPU
402+
* is already totally * confused. In this case it's likely it will
403+
* not fully execute the machine check handler either.
394404
*/
395405
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
396406
{
@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
417427
continue;
418428

419429
/*
420-
* Uncorrected events are handled by the exception handler
421-
* when it is enabled. But when the exception is disabled log
422-
* everything.
430+
* Uncorrected or signalled events are handled by the exception
431+
* handler when it is enabled, so don't process those here.
423432
*
424433
* TBD do the same check for MCI_STATUS_EN here?
425434
*/
426-
if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
435+
if (!(flags & MCP_UC) &&
436+
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
427437
continue;
428438

429439
if (m.status & MCI_STATUS_MISCV)
@@ -789,6 +799,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
789799

790800
barrier();
791801

802+
/*
803+
* When no restart IP must always kill or panic.
804+
*/
805+
if (!(m.mcgstatus & MCG_STATUS_RIPV))
806+
kill_it = 1;
807+
792808
/*
793809
* Go through all the banks in exclusion of the other CPUs.
794810
* This way we don't report duplicated events on shared banks
@@ -809,35 +825,41 @@ void do_machine_check(struct pt_regs *regs, long error_code)
809825
continue;
810826

811827
/*
812-
* Non uncorrected errors are handled by machine_check_poll
813-
* Leave them alone, unless this panics.
828+
* Non uncorrected or non signaled errors are handled by
829+
* machine_check_poll. Leave them alone, unless this panics.
814830
*/
815-
if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out)
831+
if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
832+
!no_way_out)
816833
continue;
817834

818835
/*
819836
* Set taint even when machine check was not enabled.
820837
*/
821838
add_taint(TAINT_MACHINE_CHECK);
822839

823-
__set_bit(i, toclear);
840+
severity = mce_severity(&m, tolerant, NULL);
824841

825-
if (m.status & MCI_STATUS_EN) {
826-
/*
827-
* If this error was uncorrectable and there was
828-
* an overflow, we're in trouble. If no overflow,
829-
* we might get away with just killing a task.
830-
*/
831-
if (m.status & MCI_STATUS_UC)
832-
kill_it = 1;
833-
} else {
842+
/*
843+
* When machine check was for corrected handler don't touch,
844+
* unless we're panicing.
845+
*/
846+
if (severity == MCE_KEEP_SEVERITY && !no_way_out)
847+
continue;
848+
__set_bit(i, toclear);
849+
if (severity == MCE_NO_SEVERITY) {
834850
/*
835851
* Machine check event was not enabled. Clear, but
836852
* ignore.
837853
*/
838854
continue;
839855
}
840856

857+
/*
858+
* Kill on action required.
859+
*/
860+
if (severity == MCE_AR_SEVERITY)
861+
kill_it = 1;
862+
841863
if (m.status & MCI_STATUS_MISCV)
842864
m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
843865
if (m.status & MCI_STATUS_ADDRV)
@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
846868
mce_get_rip(&m, regs);
847869
mce_log(&m);
848870

849-
severity = mce_severity(&m, tolerant, NULL);
850871
if (severity > worst) {
851872
*final = m;
852873
worst = severity;
@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
879900
* one task, do that. If the user has set the tolerance very
880901
* high, don't try to do anything at all.
881902
*/
882-
if (kill_it && tolerant < 3) {
883-
int user_space = 0;
884-
885-
/*
886-
* If the EIPV bit is set, it means the saved IP is the
887-
* instruction which caused the MCE.
888-
*/
889-
if (m.mcgstatus & MCG_STATUS_EIPV)
890-
user_space = final->ip && (final->cs & 3);
891903

892-
/*
893-
* If we know that the error was in user space, send a
894-
* SIGBUS. Otherwise, panic if tolerance is low.
895-
*
896-
* force_sig() takes an awful lot of locks and has a slight
897-
* risk of deadlocking.
898-
*/
899-
if (user_space) {
900-
force_sig(SIGBUS, current);
901-
} else if (panic_on_oops || tolerant < 2) {
902-
mce_panic("Uncorrected machine check", final, msg);
903-
}
904-
}
904+
if (kill_it && tolerant < 3)
905+
force_sig(SIGBUS, current);
905906

906907
/* notify userspace ASAP */
907908
set_thread_flag(TIF_MCE_NOTIFY);
@@ -1049,6 +1050,9 @@ static int mce_cap_init(void)
10491050
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
10501051
rip_msr = MSR_IA32_MCG_EIP;
10511052

1053+
if (cap & MCG_SER_P)
1054+
mce_ser = 1;
1055+
10521056
return 0;
10531057
}
10541058

0 commit comments

Comments
 (0)