Skip to content

Commit eac3ef2

Browse files
author
Christoph Hellwig
committed
nvme-pci: split the initial probe from the rest path
nvme_reset_work is a little fragile as it needs to handle both resetting a live controller and initializing one during probe. Split out the initial probe and open code it in nvme_probe and leave nvme_reset_work to just do the live controller reset. This fixes a recently introduced bug where nvme_dev_disable causes a NULL pointer dereferences in blk_mq_quiesce_tagset because the tagset pointer is not set when the reset state is entered directly from the new state. The separate probe code can skip the reset state and probe directly and fixes this. To make sure the system isn't single threaded on enabling nvme controllers, set the PROBE_PREFER_ASYNCHRONOUS flag in the device_driver structure so that the driver core probes in parallel. Fixes: 98d81f0 ("nvme: use blk_mq_[un]quiesce_tagset") Reported-by: Gerd Bayer <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Keith Busch <[email protected]> Reviewed-by: Chaitanya Kulkarni <[email protected]> Tested-by Gerd Bayer <[email protected]>
1 parent acb71e5 commit eac3ef2

File tree

1 file changed

+80
-53
lines changed

1 file changed

+80
-53
lines changed

drivers/nvme/host/pci.c

Lines changed: 80 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,15 +2837,7 @@ static void nvme_reset_work(struct work_struct *work)
28372837
result = nvme_pci_enable(dev);
28382838
if (result)
28392839
goto out_unlock;
2840-
2841-
if (!dev->ctrl.admin_q) {
2842-
result = nvme_pci_alloc_admin_tag_set(dev);
2843-
if (result)
2844-
goto out_unlock;
2845-
} else {
2846-
nvme_start_admin_queue(&dev->ctrl);
2847-
}
2848-
2840+
nvme_start_admin_queue(&dev->ctrl);
28492841
mutex_unlock(&dev->shutdown_lock);
28502842

28512843
/*
@@ -2873,37 +2865,23 @@ static void nvme_reset_work(struct work_struct *work)
28732865
if (result)
28742866
goto out;
28752867

2876-
if (dev->ctrl.tagset) {
2877-
/*
2878-
* This is a controller reset and we already have a tagset.
2879-
* Freeze and update the number of I/O queues as thos might have
2880-
* changed. If there are no I/O queues left after this reset,
2881-
* keep the controller around but remove all namespaces.
2882-
*/
2883-
if (dev->online_queues > 1) {
2884-
nvme_start_queues(&dev->ctrl);
2885-
nvme_wait_freeze(&dev->ctrl);
2886-
nvme_pci_update_nr_queues(dev);
2887-
nvme_dbbuf_set(dev);
2888-
nvme_unfreeze(&dev->ctrl);
2889-
} else {
2890-
dev_warn(dev->ctrl.device, "IO queues lost\n");
2891-
nvme_mark_namespaces_dead(&dev->ctrl);
2892-
nvme_start_queues(&dev->ctrl);
2893-
nvme_remove_namespaces(&dev->ctrl);
2894-
nvme_free_tagset(dev);
2895-
}
2868+
/*
2869+
* Freeze and update the number of I/O queues as thos might have
2870+
* changed. If there are no I/O queues left after this reset, keep the
2871+
* controller around but remove all namespaces.
2872+
*/
2873+
if (dev->online_queues > 1) {
2874+
nvme_start_queues(&dev->ctrl);
2875+
nvme_wait_freeze(&dev->ctrl);
2876+
nvme_pci_update_nr_queues(dev);
2877+
nvme_dbbuf_set(dev);
2878+
nvme_unfreeze(&dev->ctrl);
28962879
} else {
2897-
/*
2898-
* First probe. Still allow the controller to show up even if
2899-
* there are no namespaces.
2900-
*/
2901-
if (dev->online_queues > 1) {
2902-
nvme_pci_alloc_tag_set(dev);
2903-
nvme_dbbuf_set(dev);
2904-
} else {
2905-
dev_warn(dev->ctrl.device, "IO queues not created\n");
2906-
}
2880+
dev_warn(dev->ctrl.device, "IO queues lost\n");
2881+
nvme_mark_namespaces_dead(&dev->ctrl);
2882+
nvme_start_queues(&dev->ctrl);
2883+
nvme_remove_namespaces(&dev->ctrl);
2884+
nvme_free_tagset(dev);
29072885
}
29082886

29092887
/*
@@ -3059,15 +3037,6 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
30593037
return 0;
30603038
}
30613039

3062-
static void nvme_async_probe(void *data, async_cookie_t cookie)
3063-
{
3064-
struct nvme_dev *dev = data;
3065-
3066-
flush_work(&dev->ctrl.reset_work);
3067-
flush_work(&dev->ctrl.scan_work);
3068-
nvme_put_ctrl(&dev->ctrl);
3069-
}
3070-
30713040
static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
30723041
const struct pci_device_id *id)
30733042
{
@@ -3159,12 +3128,69 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
31593128
goto out_release_prp_pools;
31603129

31613130
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
3131+
3132+
result = nvme_pci_enable(dev);
3133+
if (result)
3134+
goto out_release_iod_mempool;
3135+
3136+
result = nvme_pci_alloc_admin_tag_set(dev);
3137+
if (result)
3138+
goto out_disable;
3139+
3140+
/*
3141+
* Mark the controller as connecting before sending admin commands to
3142+
* allow the timeout handler to do the right thing.
3143+
*/
3144+
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
3145+
dev_warn(dev->ctrl.device,
3146+
"failed to mark controller CONNECTING\n");
3147+
result = -EBUSY;
3148+
goto out_disable;
3149+
}
3150+
3151+
result = nvme_init_ctrl_finish(&dev->ctrl, false);
3152+
if (result)
3153+
goto out_disable;
3154+
3155+
nvme_dbbuf_dma_alloc(dev);
3156+
3157+
result = nvme_setup_host_mem(dev);
3158+
if (result < 0)
3159+
goto out_disable;
3160+
3161+
result = nvme_setup_io_queues(dev);
3162+
if (result)
3163+
goto out_disable;
3164+
3165+
if (dev->online_queues > 1) {
3166+
nvme_pci_alloc_tag_set(dev);
3167+
nvme_dbbuf_set(dev);
3168+
} else {
3169+
dev_warn(dev->ctrl.device, "IO queues not created\n");
3170+
}
3171+
3172+
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
3173+
dev_warn(dev->ctrl.device,
3174+
"failed to mark controller live state\n");
3175+
result = -ENODEV;
3176+
goto out_disable;
3177+
}
3178+
31623179
pci_set_drvdata(pdev, dev);
31633180

3164-
nvme_reset_ctrl(&dev->ctrl);
3165-
async_schedule(nvme_async_probe, dev);
3181+
nvme_start_ctrl(&dev->ctrl);
3182+
nvme_put_ctrl(&dev->ctrl);
31663183
return 0;
31673184

3185+
out_disable:
3186+
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
3187+
nvme_dev_disable(dev, true);
3188+
nvme_free_host_mem(dev);
3189+
nvme_dev_remove_admin(dev);
3190+
nvme_dbbuf_dma_free(dev);
3191+
nvme_free_queues(dev, 0);
3192+
out_release_iod_mempool:
3193+
mempool_destroy(dev->iod_mempool);
31683194
out_release_prp_pools:
31693195
nvme_release_prp_pools(dev);
31703196
out_dev_unmap:
@@ -3560,11 +3586,12 @@ static struct pci_driver nvme_driver = {
35603586
.probe = nvme_probe,
35613587
.remove = nvme_remove,
35623588
.shutdown = nvme_shutdown,
3563-
#ifdef CONFIG_PM_SLEEP
35643589
.driver = {
3565-
.pm = &nvme_dev_pm_ops,
3566-
},
3590+
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
3591+
#ifdef CONFIG_PM_SLEEP
3592+
.pm = &nvme_dev_pm_ops,
35673593
#endif
3594+
},
35683595
.sriov_configure = pci_sriov_configure_simple,
35693596
.err_handler = &nvme_err_handler,
35703597
};

0 commit comments

Comments
 (0)