Skip to content

Commit 090bc5a

Browse files
committed
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "Boris is on vacation so I'm sending the RAS bits this time. The main changes were: - Various RAS/CEC improvements and fixes by Borislav Petkov: - error insertion fixes - offlining latency fix - memory leak fix - additional sanity checks - cleanups - debug output improvements - More SMCA enhancements by Yazen Ghannam: - make banks truly per-CPU which they are in the hardware - don't over-cache certain registers - make the number of MCA banks per-CPU variable The long term goal with these changes is to support future heterogenous SMCA extensions. - Misc fixes and improvements" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Do not check return value of debugfs_create functions x86/MCE: Determine MCA banks' init state properly x86/MCE: Make the number of MCA banks a per-CPU variable x86/MCE/AMD: Don't cache block addresses on SMCA systems x86/MCE: Make mce_banks a per-CPU array x86/MCE: Make struct mce_banks[] static RAS/CEC: Add copyright RAS/CEC: Add CONFIG_RAS_CEC_DEBUG and move CEC debug features there RAS/CEC: Dump the different array element sections RAS/CEC: Rename count_threshold to action_threshold RAS/CEC: Sanity-check array on every insertion RAS/CEC: Fix potential memory leak RAS/CEC: Do not set decay value on error RAS/CEC: Check count_threshold unconditionally RAS/CEC: Fix pfn insertion
2 parents e192832 + 6e4f929 commit 090bc5a

File tree

7 files changed

+269
-205
lines changed

7 files changed

+269
-205
lines changed

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
9999
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
100100
};
101101

102-
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
103-
{
104-
[0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
105-
};
106-
107102
static const char *smca_get_name(enum smca_bank_types t)
108103
{
109104
if (t >= N_SMCA_BANK_TYPES)
@@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
197192
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
198193
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
199194

195+
/* Map of banks that have more than MCA_MISC0 available. */
196+
static DEFINE_PER_CPU(u32, smca_misc_banks_map);
197+
200198
static void amd_threshold_interrupt(void);
201199
static void amd_deferred_error_interrupt(void);
202200

@@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void)
206204
}
207205
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
208206

207+
static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
208+
{
209+
u32 low, high;
210+
211+
/*
212+
* For SMCA enabled processors, BLKPTR field of the first MISC register
213+
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
214+
*/
215+
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
216+
return;
217+
218+
if (!(low & MCI_CONFIG_MCAX))
219+
return;
220+
221+
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
222+
return;
223+
224+
if (low & MASK_BLKPTR_LO)
225+
per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
226+
227+
}
228+
209229
static void smca_configure(unsigned int bank, unsigned int cpu)
210230
{
211231
unsigned int i, hwid_mcatype;
@@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
243263
wrmsr(smca_config, low, high);
244264
}
245265

266+
smca_set_misc_banks_map(bank, cpu);
267+
246268
/* Return early if this bank was already initialized. */
247269
if (smca_banks[bank].hwid)
248270
return;
@@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
453475
wrmsr(MSR_CU_DEF_ERR, low, high);
454476
}
455477

456-
static u32 smca_get_block_address(unsigned int bank, unsigned int block)
478+
static u32 smca_get_block_address(unsigned int bank, unsigned int block,
479+
unsigned int cpu)
457480
{
458-
u32 low, high;
459-
u32 addr = 0;
460-
461-
if (smca_get_bank_type(bank) == SMCA_RESERVED)
462-
return addr;
463-
464481
if (!block)
465482
return MSR_AMD64_SMCA_MCx_MISC(bank);
466483

467-
/* Check our cache first: */
468-
if (smca_bank_addrs[bank][block] != -1)
469-
return smca_bank_addrs[bank][block];
470-
471-
/*
472-
* For SMCA enabled processors, BLKPTR field of the first MISC register
473-
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
474-
*/
475-
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
476-
goto out;
477-
478-
if (!(low & MCI_CONFIG_MCAX))
479-
goto out;
480-
481-
if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
482-
(low & MASK_BLKPTR_LO))
483-
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
484+
if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
485+
return 0;
484486

485-
out:
486-
smca_bank_addrs[bank][block] = addr;
487-
return addr;
487+
return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
488488
}
489489

490490
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
491-
unsigned int bank, unsigned int block)
491+
unsigned int bank, unsigned int block,
492+
unsigned int cpu)
492493
{
493494
u32 addr = 0, offset = 0;
494495

495-
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
496+
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
496497
return addr;
497498

498499
if (mce_flags.smca)
499-
return smca_get_block_address(bank, block);
500+
return smca_get_block_address(bank, block, cpu);
500501

501502
/* Fall back to method we used for older processors: */
502503
switch (block) {
@@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
624625
/* cpu init entry point, called from mce.c with preempt off */
625626
void mce_amd_feature_init(struct cpuinfo_x86 *c)
626627
{
627-
u32 low = 0, high = 0, address = 0;
628628
unsigned int bank, block, cpu = smp_processor_id();
629+
u32 low = 0, high = 0, address = 0;
629630
int offset = -1;
630631

631-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
632+
633+
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
632634
if (mce_flags.smca)
633635
smca_configure(bank, cpu);
634636

635637
disable_err_thresholding(c, bank);
636638

637639
for (block = 0; block < NR_BLOCKS; ++block) {
638-
address = get_block_address(address, low, high, bank, block);
640+
address = get_block_address(address, low, high, bank, block, cpu);
639641
if (!address)
640642
break;
641643

@@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
973975
{
974976
unsigned int bank;
975977

976-
for (bank = 0; bank < mca_cfg.banks; ++bank)
978+
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
977979
log_error_deferred(bank);
978980
}
979981

@@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void)
10141016
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
10151017
unsigned int bank, cpu = smp_processor_id();
10161018

1017-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
1019+
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
10181020
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
10191021
continue;
10201022

@@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
12011203
u32 low, high;
12021204
int err;
12031205

1204-
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
1206+
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
12051207
return 0;
12061208

12071209
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
@@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
12521254
if (err)
12531255
goto out_free;
12541256
recurse:
1255-
address = get_block_address(address, low, high, bank, ++block);
1257+
address = get_block_address(address, low, high, bank, ++block, cpu);
12561258
if (!address)
12571259
return 0;
12581260

@@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
14351437
{
14361438
unsigned int bank;
14371439

1438-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
1440+
for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
14391441
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
14401442
continue;
14411443
threshold_remove_bank(cpu, bank);
@@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
14561458
if (bp)
14571459
return 0;
14581460

1459-
bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
1461+
bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
14601462
GFP_KERNEL);
14611463
if (!bp)
14621464
return -ENOMEM;
14631465

14641466
per_cpu(threshold_banks, cpu) = bp;
14651467

1466-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
1468+
for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
14671469
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
14681470
continue;
14691471
err = threshold_create_bank(cpu, bank);

0 commit comments

Comments
 (0)