Skip to content

Commit c947657

Browse files
Israel RukshinChristoph Hellwig
authored andcommitted
nvme-rdma: Fix command completion race at error recovery
The race is between completing the request at error recovery work and rdma completions. If we cancel the request before getting the good rdma completion we get a NULL deref of the request MR at nvme_rdma_process_nvme_rsp(). When Canceling the request we return its mr to the mr pool (set mr to NULL) and also unmap its data. Canceling the requests while the rdma queues are active is not safe. Because rdma queues are active and we get good rdma completions that can use the mr pointer which may be NULL. Completing the request too soon may lead also to performing DMA to/from user buffers which might have been already unmapped. The commit fixes the race by draining the QP before starting the abort commands mechanism. Signed-off-by: Israel Rukshin <[email protected]> Reviewed-by: Max Gurtovoy <[email protected]> Signed-off-by: Sagi Grimberg <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 94e4221 commit c947657

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

drivers/nvme/host/rdma.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,6 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
728728
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
729729
bool remove)
730730
{
731-
nvme_rdma_stop_queue(&ctrl->queues[0]);
732731
if (remove) {
733732
blk_cleanup_queue(ctrl->ctrl.admin_q);
734733
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
@@ -817,7 +816,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
817816
static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
818817
bool remove)
819818
{
820-
nvme_rdma_stop_io_queues(ctrl);
821819
if (remove) {
822820
blk_cleanup_queue(ctrl->ctrl.connect_q);
823821
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
@@ -947,6 +945,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
947945
return;
948946

949947
destroy_admin:
948+
nvme_rdma_stop_queue(&ctrl->queues[0]);
950949
nvme_rdma_destroy_admin_queue(ctrl, false);
951950
requeue:
952951
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
@@ -963,12 +962,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
963962

964963
if (ctrl->ctrl.queue_count > 1) {
965964
nvme_stop_queues(&ctrl->ctrl);
965+
nvme_rdma_stop_io_queues(ctrl);
966966
blk_mq_tagset_busy_iter(&ctrl->tag_set,
967967
nvme_cancel_request, &ctrl->ctrl);
968968
nvme_rdma_destroy_io_queues(ctrl, false);
969969
}
970970

971971
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
972+
nvme_rdma_stop_queue(&ctrl->queues[0]);
972973
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
973974
nvme_cancel_request, &ctrl->ctrl);
974975
nvme_rdma_destroy_admin_queue(ctrl, false);
@@ -1734,6 +1735,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
17341735
{
17351736
if (ctrl->ctrl.queue_count > 1) {
17361737
nvme_stop_queues(&ctrl->ctrl);
1738+
nvme_rdma_stop_io_queues(ctrl);
17371739
blk_mq_tagset_busy_iter(&ctrl->tag_set,
17381740
nvme_cancel_request, &ctrl->ctrl);
17391741
nvme_rdma_destroy_io_queues(ctrl, shutdown);
@@ -1745,6 +1747,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
17451747
nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
17461748

17471749
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
1750+
nvme_rdma_stop_queue(&ctrl->queues[0]);
17481751
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
17491752
nvme_cancel_request, &ctrl->ctrl);
17501753
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@ -2011,6 +2014,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
20112014
return &ctrl->ctrl;
20122015

20132016
out_remove_admin_queue:
2017+
nvme_rdma_stop_queue(&ctrl->queues[0]);
20142018
nvme_rdma_destroy_admin_queue(ctrl, true);
20152019
out_uninit_ctrl:
20162020
nvme_uninit_ctrl(&ctrl->ctrl);

0 commit comments

Comments
 (0)