Skip to content

Commit 6839a6d

Browse files
stellarhopperdjbw
authored andcommitted
nfit: do an ARS scrub on hitting a latent media error
When a latent (unknown to 'badblocks') error is encountered, it will trigger a machine check exception. On a system with machine check recovery, this will only SIGBUS the process(es) which had the bad page mapped (as opposed to a kernel panic on platforms without machine check recovery features). In the former case, we want to trigger a full rescan of that nvdimm bus. This will allow any additional, new errors to be captured in the block devices' badblocks lists, and offending operations on them can be trapped early, avoiding machine checks. This is done by registering a callback function with the x86_mce_decoder_chain and calling the new ars_rescan functionality with the address in the mce notificatiion. Cc: Rafael J. Wysocki <[email protected]> Cc: Tony Luck <[email protected]> Signed-off-by: Vishal Verma <[email protected]> Signed-off-by: Dan Williams <[email protected]>
1 parent bdf9701 commit 6839a6d

File tree

5 files changed

+133
-4
lines changed

5 files changed

+133
-4
lines changed

drivers/acpi/nfit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
obj-$(CONFIG_ACPI_NFIT) := nfit.o
22
nfit-y := core.o
3+
nfit-$(CONFIG_X86_MCE) += mce.o

drivers/acpi/nfit/core.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ module_param(disable_vendor_specific, bool, S_IRUGO);
5151
MODULE_PARM_DESC(disable_vendor_specific,
5252
"Limit commands to the publicly specified set\n");
5353

54+
LIST_HEAD(acpi_descs);
55+
DEFINE_MUTEX(acpi_desc_lock);
56+
5457
static struct workqueue_struct *nfit_wq;
5558

5659
struct nfit_table_prev {
@@ -361,7 +364,7 @@ static const char *spa_type_name(u16 type)
361364
return to_name[type];
362365
}
363366

364-
static int nfit_spa_type(struct acpi_nfit_system_address *spa)
367+
int nfit_spa_type(struct acpi_nfit_system_address *spa)
365368
{
366369
int i;
367370

@@ -898,8 +901,6 @@ static ssize_t scrub_show(struct device *dev,
898901
return rc;
899902
}
900903

901-
static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
902-
903904
static ssize_t scrub_store(struct device *dev,
904905
struct device_attribute *attr, const char *buf, size_t size)
905906
{
@@ -2400,6 +2401,11 @@ static void acpi_nfit_destruct(void *data)
24002401
struct acpi_nfit_desc *acpi_desc = data;
24012402
struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
24022403

2404+
/*
2405+
* Destruct under acpi_desc_lock so that nfit_handle_mce does not
2406+
* race teardown
2407+
*/
2408+
mutex_lock(&acpi_desc_lock);
24032409
acpi_desc->cancel = 1;
24042410
/*
24052411
* Bounce the nvdimm bus lock to make sure any in-flight
@@ -2414,6 +2420,8 @@ static void acpi_nfit_destruct(void *data)
24142420
sysfs_put(acpi_desc->scrub_count_state);
24152421
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
24162422
acpi_desc->nvdimm_bus = NULL;
2423+
list_del(&acpi_desc->list);
2424+
mutex_unlock(&acpi_desc_lock);
24172425
}
24182426

24192427
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
@@ -2439,6 +2447,11 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
24392447
rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
24402448
if (rc)
24412449
return rc;
2450+
2451+
/* register this acpi_desc for mce notifications */
2452+
mutex_lock(&acpi_desc_lock);
2453+
list_add_tail(&acpi_desc->list, &acpi_descs);
2454+
mutex_unlock(&acpi_desc_lock);
24422455
}
24432456

24442457
mutex_lock(&acpi_desc->init_mutex);
@@ -2549,7 +2562,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
25492562
return 0;
25502563
}
25512564

2552-
static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
2565+
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
25532566
{
25542567
struct device *dev = acpi_desc->dev;
25552568
struct nfit_spa *nfit_spa;
@@ -2598,6 +2611,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
25982611
INIT_LIST_HEAD(&acpi_desc->flushes);
25992612
INIT_LIST_HEAD(&acpi_desc->memdevs);
26002613
INIT_LIST_HEAD(&acpi_desc->dimms);
2614+
INIT_LIST_HEAD(&acpi_desc->list);
26012615
mutex_init(&acpi_desc->init_mutex);
26022616
INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
26032617
}
@@ -2750,13 +2764,17 @@ static __init int nfit_init(void)
27502764
if (!nfit_wq)
27512765
return -ENOMEM;
27522766

2767+
nfit_mce_register();
2768+
27532769
return acpi_bus_register_driver(&acpi_nfit_driver);
27542770
}
27552771

27562772
static __exit void nfit_exit(void)
27572773
{
2774+
nfit_mce_unregister();
27582775
acpi_bus_unregister_driver(&acpi_nfit_driver);
27592776
destroy_workqueue(nfit_wq);
2777+
WARN_ON(!list_empty(&acpi_descs));
27602778
}
27612779

27622780
module_init(nfit_init);

drivers/acpi/nfit/mce.c

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* NFIT - Machine Check Handler
3+
*
4+
* Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
5+
*
6+
* This program is free software; you can redistribute it and/or modify
7+
* it under the terms of version 2 of the GNU General Public License as
8+
* published by the Free Software Foundation.
9+
*
10+
* This program is distributed in the hope that it will be useful, but
11+
* WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13+
* General Public License for more details.
14+
*/
15+
#include <linux/notifier.h>
16+
#include <linux/acpi.h>
17+
#include <asm/mce.h>
18+
#include "nfit.h"
19+
20+
static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
21+
void *data)
22+
{
23+
struct mce *mce = (struct mce *)data;
24+
struct acpi_nfit_desc *acpi_desc;
25+
struct nfit_spa *nfit_spa;
26+
27+
/* We only care about memory errors */
28+
if (!(mce->status & MCACOD))
29+
return NOTIFY_DONE;
30+
31+
/*
32+
* mce->addr contains the physical addr accessed that caused the
33+
* machine check. We need to walk through the list of NFITs, and see
34+
* if any of them matches that address, and only then start a scrub.
35+
*/
36+
mutex_lock(&acpi_desc_lock);
37+
list_for_each_entry(acpi_desc, &acpi_descs, list) {
38+
struct device *dev = acpi_desc->dev;
39+
int found_match = 0;
40+
41+
mutex_lock(&acpi_desc->init_mutex);
42+
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
43+
struct acpi_nfit_system_address *spa = nfit_spa->spa;
44+
45+
if (nfit_spa_type(spa) == NFIT_SPA_PM)
46+
continue;
47+
/* find the spa that covers the mce addr */
48+
if (spa->address > mce->addr)
49+
continue;
50+
if ((spa->address + spa->length - 1) < mce->addr)
51+
continue;
52+
found_match = 1;
53+
dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
54+
__func__, spa->range_index, spa->address,
55+
spa->length);
56+
/*
57+
* We can break at the first match because we're going
58+
* to rescan all the SPA ranges. There shouldn't be any
59+
* aliasing anyway.
60+
*/
61+
break;
62+
}
63+
mutex_unlock(&acpi_desc->init_mutex);
64+
65+
/*
66+
* We can ignore an -EBUSY here because if an ARS is already
67+
* in progress, just let that be the last authoritative one
68+
*/
69+
if (found_match)
70+
acpi_nfit_ars_rescan(acpi_desc);
71+
}
72+
73+
mutex_unlock(&acpi_desc_lock);
74+
return NOTIFY_DONE;
75+
}
76+
77+
static struct notifier_block nfit_mce_dec = {
78+
.notifier_call = nfit_handle_mce,
79+
};
80+
81+
void nfit_mce_register(void)
82+
{
83+
mce_register_decode_chain(&nfit_mce_dec);
84+
}
85+
86+
void nfit_mce_unregister(void)
87+
{
88+
mce_unregister_decode_chain(&nfit_mce_dec);
89+
}

drivers/acpi/nfit/nfit.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define __NFIT_H__
1717
#include <linux/workqueue.h>
1818
#include <linux/libnvdimm.h>
19+
#include <linux/ndctl.h>
1920
#include <linux/types.h>
2021
#include <linux/uuid.h>
2122
#include <linux/acpi.h>
@@ -148,6 +149,7 @@ struct acpi_nfit_desc {
148149
struct nd_cmd_ars_status *ars_status;
149150
size_t ars_status_size;
150151
struct work_struct work;
152+
struct list_head list;
151153
struct kernfs_node *scrub_count_state;
152154
unsigned int scrub_count;
153155
unsigned int cancel:1;
@@ -187,6 +189,24 @@ struct nfit_blk {
187189
u32 dimm_flags;
188190
};
189191

192+
extern struct list_head acpi_descs;
193+
extern struct mutex acpi_desc_lock;
194+
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
195+
196+
#ifdef CONFIG_X86_MCE
197+
void nfit_mce_register(void);
198+
void nfit_mce_unregister(void);
199+
#else
200+
static inline void nfit_mce_register(void)
201+
{
202+
}
203+
static inline void nfit_mce_unregister(void)
204+
{
205+
}
206+
#endif
207+
208+
int nfit_spa_type(struct acpi_nfit_system_address *spa);
209+
190210
static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
191211
struct nfit_mem *nfit_mem)
192212
{

tools/testing/nvdimm/Kbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ obj-$(CONFIG_DEV_DAX) += dax.o
3030
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
3131

3232
nfit-y := $(ACPI_SRC)/core.o
33+
nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
3334
nfit-y += config_check.o
3435

3536
nd_pmem-y := $(NVDIMM_SRC)/pmem.o

0 commit comments

Comments
 (0)