Skip to content

Commit c6708d5

Browse files
yghannamKAGA-KOKO
authored andcommitted
x86/MCE: Report only DRAM ECC as memory errors on AMD systems
The MCA_STATUS[ErrorCodeExt] field is very bank type specific. We currently check if the ErrorCodeExt value is 0x0 or 0x8 in mce_is_memory_error(), but we don't check the bank number. This means that we could flag non-memory errors as memory errors. We know that we want to flag DRAM ECC errors as memory errors, so let's do those cases first. We can add more cases later when needed. Define a wrapper function in mce_amd.c so we can use SMCA enums. [ bp: Remove brackets around return statements. ] Signed-off-by: Yazen Ghannam <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: http://lkml.kernel.org/r/[email protected]
1 parent 11cf887 commit c6708d5

File tree

3 files changed

+14
-3
lines changed

3 files changed

+14
-3
lines changed

arch/x86/include/asm/mce.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ struct smca_bank {
376376
extern struct smca_bank smca_banks[MAX_NR_BANKS];
377377

378378
extern const char *smca_get_long_name(enum smca_bank_types t);
379+
extern bool amd_mce_is_memory_error(struct mce *m);
379380

380381
extern int mce_threshold_create_device(unsigned int cpu);
381382
extern int mce_threshold_remove_device(unsigned int cpu);
@@ -384,6 +385,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);
384385

385386
static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
386387
static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
388+
static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
387389

388390
#endif
389391

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,10 +503,8 @@ static int mce_usable_address(struct mce *m)
503503
bool mce_is_memory_error(struct mce *m)
504504
{
505505
if (m->cpuvendor == X86_VENDOR_AMD) {
506-
/* ErrCodeExt[20:16] */
507-
u8 xec = (m->status >> 16) & 0x1f;
506+
return amd_mce_is_memory_error(m);
508507

509-
return (xec == 0x0 || xec == 0x8);
510508
} else if (m->cpuvendor == X86_VENDOR_INTEL) {
511509
/*
512510
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes

arch/x86/kernel/cpu/mcheck/mce_amd.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,17 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
754754
}
755755
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
756756

757+
bool amd_mce_is_memory_error(struct mce *m)
758+
{
759+
/* ErrCodeExt[20:16] */
760+
u8 xec = (m->status >> 16) & 0x1f;
761+
762+
if (mce_flags.smca)
763+
return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
764+
765+
return m->bank == 4 && xec == 0x8;
766+
}
767+
757768
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
758769
{
759770
struct mce m;

0 commit comments

Comments
 (0)