Skip to content

Commit c5f6ce9

Browse files
Keith Buschaxboe
authored andcommitted
nvme: don't schedule multiple resets
The queue_work only fails if the work is pending, but not yet running. If the work is running, the work item would get requeued, triggering a double reset. If the first reset fails for any reason, the second reset triggers: WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING) Hitting that schedules controller deletion for a second time, which potentially takes a reference on the device that is being deleted. If the reset occurs at the same time as a hot removal event, this causes a double-free. This patch has the reset helper function check if the work is busy prior to queueing, and changes all places that schedule resets to use this function. Since most users don't want to sync with that work, the "flush_work" is moved to the only caller that wants to sync. Signed-off-by: Keith Busch <[email protected]> Reviewed-by: Sagi Grimberg<[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 7065906 commit c5f6ce9

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

drivers/nvme/host/pci.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
892892
"I/O %d QID %d timeout, reset controller\n",
893893
req->tag, nvmeq->qid);
894894
nvme_dev_disable(dev, false);
895-
queue_work(nvme_workq, &dev->reset_work);
895+
nvme_reset(dev);
896896

897897
/*
898898
* Mark the request as handled, since the inline shutdown
@@ -1290,7 +1290,7 @@ static void nvme_watchdog_timer(unsigned long data)
12901290

12911291
/* Skip controllers under certain specific conditions. */
12921292
if (nvme_should_reset(dev, csts)) {
1293-
if (queue_work(nvme_workq, &dev->reset_work))
1293+
if (!nvme_reset(dev))
12941294
dev_warn(dev->dev,
12951295
"Failed status: 0x%x, reset controller.\n",
12961296
csts);
@@ -1818,11 +1818,10 @@ static int nvme_reset(struct nvme_dev *dev)
18181818
{
18191819
if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
18201820
return -ENODEV;
1821-
1821+
if (work_busy(&dev->reset_work))
1822+
return -ENODEV;
18221823
if (!queue_work(nvme_workq, &dev->reset_work))
18231824
return -EBUSY;
1824-
1825-
flush_work(&dev->reset_work);
18261825
return 0;
18271826
}
18281827

@@ -1846,7 +1845,12 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
18461845

18471846
static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
18481847
{
1849-
return nvme_reset(to_nvme_dev(ctrl));
1848+
struct nvme_dev *dev = to_nvme_dev(ctrl);
1849+
int ret = nvme_reset(dev);
1850+
1851+
if (!ret)
1852+
flush_work(&dev->reset_work);
1853+
return ret;
18501854
}
18511855

18521856
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
@@ -1940,7 +1944,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
19401944
if (prepare)
19411945
nvme_dev_disable(dev, false);
19421946
else
1943-
queue_work(nvme_workq, &dev->reset_work);
1947+
nvme_reset(dev);
19441948
}
19451949

19461950
static void nvme_shutdown(struct pci_dev *pdev)
@@ -2009,7 +2013,7 @@ static int nvme_resume(struct device *dev)
20092013
struct pci_dev *pdev = to_pci_dev(dev);
20102014
struct nvme_dev *ndev = pci_get_drvdata(pdev);
20112015

2012-
queue_work(nvme_workq, &ndev->reset_work);
2016+
nvme_reset(ndev);
20132017
return 0;
20142018
}
20152019
#endif
@@ -2048,7 +2052,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
20482052

20492053
dev_info(dev->ctrl.device, "restart after slot reset\n");
20502054
pci_restore_state(pdev);
2051-
queue_work(nvme_workq, &dev->reset_work);
2055+
nvme_reset(dev);
20522056
return PCI_ERS_RESULT_RECOVERED;
20532057
}
20542058

0 commit comments

Comments
 (0)