Skip to content

Commit 3f5a789

Browse files
aeglKAGA-KOKO
authored andcommitted
x86/mce: Include the PPIN in MCE records when available
Intel Xeons from Ivy Bridge onwards support a processor identification number set in the factory. To the user this is a handy unique number to identify a particular CPU. Intel can decode this to the fab/production run to track errors. On systems that have it, include it in the machine check record. I'm told that this would be helpful for users that run large data centers with multi-socket servers to keep track of which CPUs are seeing errors. Boris: * Add some clarifying comments and spacing. * Mask out [63:2] in the disabled-but-not-locked case * Call the MSR variable "val" for more readability. Signed-off-by: Tony Luck <[email protected]> Cc: Ashok Raj <[email protected]> Cc: linux-edac <[email protected]> Cc: x86-ml <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Borislav Petkov <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]>
1 parent f5382de commit 3f5a789

File tree

5 files changed

+47
-0
lines changed

5 files changed

+47
-0
lines changed

arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@
193193
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
194194
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
195195

196+
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
196197
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
197198
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
198199
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */

arch/x86/include/asm/msr-index.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
#define EFER_FFXSR (1<<_EFER_FFXSR)
3838

3939
/* Intel MSRs. Some also available on other CPUs */
40+
41+
#define MSR_PPIN_CTL 0x0000004e
42+
#define MSR_PPIN 0x0000004f
43+
4044
#define MSR_IA32_PERFCTR0 0x000000c1
4145
#define MSR_IA32_PERFCTR1 0x000000c2
4246
#define MSR_FSB_FREQ 0x000000cd

arch/x86/include/uapi/asm/mce.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ struct mce {
2828
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
2929
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
3030
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
31+
__u64 ppin; /* Protected Processor Inventory Number */
3132
};
3233

3334
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <linux/export.h>
4444
#include <linux/jump_label.h>
4545

46+
#include <asm/intel-family.h>
4647
#include <asm/processor.h>
4748
#include <asm/traps.h>
4849
#include <asm/tlbflush.h>
@@ -135,6 +136,9 @@ void mce_setup(struct mce *m)
135136
m->socketid = cpu_data(m->extcpu).phys_proc_id;
136137
m->apicid = cpu_data(m->extcpu).initial_apicid;
137138
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
139+
140+
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
141+
rdmsrl(MSR_PPIN, m->ppin);
138142
}
139143

140144
DEFINE_PER_CPU(struct mce, injectm);

arch/x86/kernel/cpu/mcheck/mce_intel.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <linux/sched.h>
1212
#include <linux/cpumask.h>
1313
#include <asm/apic.h>
14+
#include <asm/cpufeature.h>
15+
#include <asm/intel-family.h>
1416
#include <asm/processor.h>
1517
#include <asm/msr.h>
1618
#include <asm/mce.h>
@@ -464,11 +466,46 @@ static void intel_clear_lmce(void)
464466
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
465467
}
466468

469+
static void intel_ppin_init(struct cpuinfo_x86 *c)
470+
{
471+
unsigned long long val;
472+
473+
/*
474+
* Even if testing the presence of the MSR would be enough, we don't
475+
* want to risk the situation where other models reuse this MSR for
476+
* other purposes.
477+
*/
478+
switch (c->x86_model) {
479+
case INTEL_FAM6_IVYBRIDGE_X:
480+
case INTEL_FAM6_HASWELL_X:
481+
case INTEL_FAM6_BROADWELL_XEON_D:
482+
case INTEL_FAM6_BROADWELL_X:
483+
case INTEL_FAM6_SKYLAKE_X:
484+
if (rdmsrl_safe(MSR_PPIN_CTL, &val))
485+
return;
486+
487+
if ((val & 3UL) == 1UL) {
488+
/* PPIN available but disabled: */
489+
return;
490+
}
491+
492+
/* If PPIN is disabled, but not locked, try to enable: */
493+
if (!(val & 3UL)) {
494+
wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
495+
rdmsrl_safe(MSR_PPIN_CTL, &val);
496+
}
497+
498+
if ((val & 3UL) == 2UL)
499+
set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
500+
}
501+
}
502+
467503
void mce_intel_feature_init(struct cpuinfo_x86 *c)
468504
{
469505
intel_init_thermal(c);
470506
intel_init_cmci();
471507
intel_init_lmce();
508+
intel_ppin_init(c);
472509
}
473510

474511
void mce_intel_feature_clear(struct cpuinfo_x86 *c)

0 commit comments

Comments
 (0)