Skip to content

Commit 37b137f

Browse files
stellarhopperdjbw
authored andcommitted
nfit, libnvdimm: allow an ARS scrub to be triggered on demand
Normally, an ARS (Address Range Scrub) only happens at boot/initialization time. There can however arise situations where a bus-wide rescan is needed - notably, in the case of discovering a latent media error, we should do a full rescan to figure out what other sectors are bad, and thus potentially avoid triggering an mce on them in the future. Also provide a sysfs trigger to start a bus-wide scrub. Cc: Rafael J. Wysocki <[email protected]> Signed-off-by: Vishal Verma <[email protected]> Signed-off-by: Dan Williams <[email protected]>
1 parent 1851594 commit 37b137f

File tree

4 files changed

+165
-8
lines changed

4 files changed

+165
-8
lines changed

drivers/acpi/nfit.c

Lines changed: 154 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/module.h>
1616
#include <linux/mutex.h>
1717
#include <linux/ndctl.h>
18+
#include <linux/sysfs.h>
1819
#include <linux/delay.h>
1920
#include <linux/list.h>
2021
#include <linux/acpi.h>
@@ -874,14 +875,87 @@ static ssize_t revision_show(struct device *dev,
874875
}
875876
static DEVICE_ATTR_RO(revision);
876877

878+
/*
879+
* This shows the number of full Address Range Scrubs that have been
880+
* completed since driver load time. Userspace can wait on this using
881+
* select/poll etc. A '+' at the end indicates an ARS is in progress
882+
*/
883+
static ssize_t scrub_show(struct device *dev,
884+
struct device_attribute *attr, char *buf)
885+
{
886+
struct nvdimm_bus_descriptor *nd_desc;
887+
ssize_t rc = -ENXIO;
888+
889+
device_lock(dev);
890+
nd_desc = dev_get_drvdata(dev);
891+
if (nd_desc) {
892+
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
893+
894+
rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
895+
(work_busy(&acpi_desc->work)) ? "+\n" : "\n");
896+
}
897+
device_unlock(dev);
898+
return rc;
899+
}
900+
901+
static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
902+
903+
static ssize_t scrub_store(struct device *dev,
904+
struct device_attribute *attr, const char *buf, size_t size)
905+
{
906+
struct nvdimm_bus_descriptor *nd_desc;
907+
ssize_t rc;
908+
long val;
909+
910+
rc = kstrtol(buf, 0, &val);
911+
if (rc)
912+
return rc;
913+
if (val != 1)
914+
return -EINVAL;
915+
916+
device_lock(dev);
917+
nd_desc = dev_get_drvdata(dev);
918+
if (nd_desc) {
919+
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
920+
921+
rc = acpi_nfit_ars_rescan(acpi_desc);
922+
}
923+
device_unlock(dev);
924+
if (rc)
925+
return rc;
926+
return size;
927+
}
928+
static DEVICE_ATTR_RW(scrub);
929+
930+
static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
931+
{
932+
struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
933+
const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
934+
| 1 << ND_CMD_ARS_STATUS;
935+
936+
return (nd_desc->cmd_mask & mask) == mask;
937+
}
938+
939+
static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
940+
{
941+
struct device *dev = container_of(kobj, struct device, kobj);
942+
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
943+
944+
if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
945+
return 0;
946+
return a->mode;
947+
}
948+
877949
static struct attribute *acpi_nfit_attributes[] = {
878950
&dev_attr_revision.attr,
951+
&dev_attr_scrub.attr,
879952
NULL,
880953
};
881954

882955
static struct attribute_group acpi_nfit_attribute_group = {
883956
.name = "nfit",
884957
.attrs = acpi_nfit_attributes,
958+
.is_visible = nfit_visible,
885959
};
886960

887961
static const struct attribute_group *acpi_nfit_attribute_groups[] = {
@@ -2054,7 +2128,7 @@ static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
20542128
unsigned int tmo = scrub_timeout;
20552129
int rc;
20562130

2057-
if (nfit_spa->ars_done || !nfit_spa->nd_region)
2131+
if (!nfit_spa->ars_required || !nfit_spa->nd_region)
20582132
return;
20592133

20602134
rc = ars_start(acpi_desc, nfit_spa);
@@ -2143,7 +2217,9 @@ static void acpi_nfit_scrub(struct work_struct *work)
21432217
* firmware initiated scrubs to complete and then we go search for the
21442218
* affected spa regions to mark them scanned. In the second phase we
21452219
* initiate a directed scrub for every range that was not scrubbed in
2146-
* phase 1.
2220+
* phase 1. If we're called for a 'rescan', we harmlessly pass through
2221+
* the first phase, but really only care about running phase 2, where
2222+
* regions can be notified of new poison.
21472223
*/
21482224

21492225
/* process platform firmware initiated scrubs */
@@ -2246,14 +2322,17 @@ static void acpi_nfit_scrub(struct work_struct *work)
22462322
* Flag all the ranges that still need scrubbing, but
22472323
* register them now to make data available.
22482324
*/
2249-
if (nfit_spa->nd_region)
2250-
nfit_spa->ars_done = 1;
2251-
else
2325+
if (!nfit_spa->nd_region) {
2326+
nfit_spa->ars_required = 1;
22522327
acpi_nfit_register_region(acpi_desc, nfit_spa);
2328+
}
22532329
}
22542330

22552331
list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
22562332
acpi_nfit_async_scrub(acpi_desc, nfit_spa);
2333+
acpi_desc->scrub_count++;
2334+
if (acpi_desc->scrub_count_state)
2335+
sysfs_notify_dirent(acpi_desc->scrub_count_state);
22572336
mutex_unlock(&acpi_desc->init_mutex);
22582337
}
22592338

@@ -2291,12 +2370,48 @@ static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
22912370
return 0;
22922371
}
22932372

2373+
static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
2374+
{
2375+
struct device *dev = acpi_desc->dev;
2376+
struct kernfs_node *nfit;
2377+
struct device *bus_dev;
2378+
2379+
if (!ars_supported(acpi_desc->nvdimm_bus))
2380+
return 0;
2381+
2382+
bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2383+
nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
2384+
if (!nfit) {
2385+
dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
2386+
return -ENODEV;
2387+
}
2388+
acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
2389+
sysfs_put(nfit);
2390+
if (!acpi_desc->scrub_count_state) {
2391+
dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
2392+
return -ENODEV;
2393+
}
2394+
2395+
return 0;
2396+
}
2397+
22942398
static void acpi_nfit_destruct(void *data)
22952399
{
22962400
struct acpi_nfit_desc *acpi_desc = data;
2401+
struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
22972402

22982403
acpi_desc->cancel = 1;
2404+
/*
2405+
* Bounce the nvdimm bus lock to make sure any in-flight
2406+
* acpi_nfit_ars_rescan() submissions have had a chance to
2407+
* either submit or see ->cancel set.
2408+
*/
2409+
device_lock(bus_dev);
2410+
device_unlock(bus_dev);
2411+
22992412
flush_workqueue(nfit_wq);
2413+
if (acpi_desc->scrub_count_state)
2414+
sysfs_put(acpi_desc->scrub_count_state);
23002415
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
23012416
acpi_desc->nvdimm_bus = NULL;
23022417
}
@@ -2309,14 +2424,21 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
23092424
int rc;
23102425

23112426
if (!acpi_desc->nvdimm_bus) {
2427+
acpi_nfit_init_dsms(acpi_desc);
2428+
23122429
acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
23132430
&acpi_desc->nd_desc);
23142431
if (!acpi_desc->nvdimm_bus)
23152432
return -ENOMEM;
2433+
23162434
rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
23172435
acpi_desc);
23182436
if (rc)
23192437
return rc;
2438+
2439+
rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
2440+
if (rc)
2441+
return rc;
23202442
}
23212443

23222444
mutex_lock(&acpi_desc->init_mutex);
@@ -2360,8 +2482,6 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
23602482
if (rc)
23612483
goto out_unlock;
23622484

2363-
acpi_nfit_init_dsms(acpi_desc);
2364-
23652485
rc = acpi_nfit_register_dimms(acpi_desc);
23662486
if (rc)
23672487
goto out_unlock;
@@ -2429,6 +2549,33 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
24292549
return 0;
24302550
}
24312551

2552+
static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
2553+
{
2554+
struct device *dev = acpi_desc->dev;
2555+
struct nfit_spa *nfit_spa;
2556+
2557+
if (work_busy(&acpi_desc->work))
2558+
return -EBUSY;
2559+
2560+
if (acpi_desc->cancel)
2561+
return 0;
2562+
2563+
mutex_lock(&acpi_desc->init_mutex);
2564+
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2565+
struct acpi_nfit_system_address *spa = nfit_spa->spa;
2566+
2567+
if (nfit_spa_type(spa) != NFIT_SPA_PM)
2568+
continue;
2569+
2570+
nfit_spa->ars_required = 1;
2571+
}
2572+
queue_work(nfit_wq, &acpi_desc->work);
2573+
dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
2574+
mutex_unlock(&acpi_desc->init_mutex);
2575+
2576+
return 0;
2577+
}
2578+
24322579
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
24332580
{
24342581
struct nvdimm_bus_descriptor *nd_desc;

drivers/acpi/nfit.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ enum {
8080
struct nfit_spa {
8181
struct list_head list;
8282
struct nd_region *nd_region;
83-
unsigned int ars_done:1;
83+
unsigned int ars_required:1;
8484
u32 clear_err_unit;
8585
u32 max_ars;
8686
struct acpi_nfit_system_address spa[0];
@@ -148,6 +148,8 @@ struct acpi_nfit_desc {
148148
struct nd_cmd_ars_status *ars_status;
149149
size_t ars_status_size;
150150
struct work_struct work;
151+
struct kernfs_node *scrub_count_state;
152+
unsigned int scrub_count;
151153
unsigned int cancel:1;
152154
unsigned long dimm_cmd_force_en;
153155
unsigned long bus_cmd_force_en;

drivers/nvdimm/core.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,13 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
201201
}
202202
EXPORT_SYMBOL_GPL(to_nd_desc);
203203

204+
struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
205+
{
206+
/* struct nvdimm_bus definition is private to libnvdimm */
207+
return &nvdimm_bus->dev;
208+
}
209+
EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
210+
204211
static bool is_uuid_sep(char sep)
205212
{
206213
if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')

include/linux/libnvdimm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ struct nvdimm *to_nvdimm(struct device *dev);
137137
struct nd_region *to_nd_region(struct device *dev);
138138
struct nd_blk_region *to_nd_blk_region(struct device *dev);
139139
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
140+
struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
140141
const char *nvdimm_name(struct nvdimm *nvdimm);
141142
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
142143
void *nvdimm_provider_data(struct nvdimm *nvdimm);

0 commit comments

Comments
 (0)