Skip to content

Commit d8f7750

Browse files
committed
nvmet-rdma: Correctly handle RDMA device hot removal
When configuring a device attached listener, we may see device removal events. In this case we return a non-zero return code from the cm event handler which implicitly destroys the cm_id. It is possible that in the future the user will remove this listener and by that trigger a second call to rdma_destroy_id on an already destroyed cm_id -> BUG. In addition, when a queue bound (active session) cm_id generates a DEVICE_REMOVAL event we must guarantee all resources are cleaned up by the time we return from the event handler. Introduce nvmet_rdma_device_removal which addresses (or at least attempts to) both scenarios. Signed-off-by: Sagi Grimberg <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]>
1 parent 45862eb commit d8f7750

File tree

1 file changed

+70
-17
lines changed

1 file changed

+70
-17
lines changed

drivers/nvme/target/rdma.c

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
7777
NVMET_RDMA_Q_CONNECTING,
7878
NVMET_RDMA_Q_LIVE,
7979
NVMET_RDMA_Q_DISCONNECTING,
80+
NVMET_RDMA_IN_DEVICE_REMOVAL,
8081
};
8182

8283
struct nvmet_rdma_queue {
@@ -984,7 +985,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
984985
struct nvmet_rdma_device *dev = queue->dev;
985986

986987
nvmet_rdma_free_queue(queue);
987-
rdma_destroy_id(cm_id);
988+
989+
if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
990+
rdma_destroy_id(cm_id);
991+
988992
kref_put(&dev->ref, nvmet_rdma_free_dev);
989993
}
990994

@@ -1233,8 +1237,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
12331237
switch (queue->state) {
12341238
case NVMET_RDMA_Q_CONNECTING:
12351239
case NVMET_RDMA_Q_LIVE:
1236-
disconnect = true;
12371240
queue->state = NVMET_RDMA_Q_DISCONNECTING;
1241+
case NVMET_RDMA_IN_DEVICE_REMOVAL:
1242+
disconnect = true;
12381243
break;
12391244
case NVMET_RDMA_Q_DISCONNECTING:
12401245
break;
@@ -1272,6 +1277,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
12721277
schedule_work(&queue->release_work);
12731278
}
12741279

1280+
/**
1281+
* nvme_rdma_device_removal() - Handle RDMA device removal
1282+
* @queue: nvmet rdma queue (cm id qp_context)
1283+
* @addr: nvmet address (cm_id context)
1284+
*
1285+
* DEVICE_REMOVAL event notifies us that the RDMA device is about
1286+
* to unplug so we should take care of destroying our RDMA resources.
1287+
* This event will be generated for each allocated cm_id.
1288+
*
1289+
* Note that this event can be generated on a normal queue cm_id
1290+
* and/or a device bound listener cm_id (where in this case
1291+
* queue will be null).
1292+
*
1293+
* we claim ownership on destroying the cm_id. For queues we move
1294+
* the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
1295+
* we nullify the priv to prevent double cm_id destruction and destroying
1296+
* the cm_id implicitely by returning a non-zero rc to the callout.
1297+
*/
1298+
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
1299+
struct nvmet_rdma_queue *queue)
1300+
{
1301+
unsigned long flags;
1302+
1303+
if (!queue) {
1304+
struct nvmet_port *port = cm_id->context;
1305+
1306+
/*
1307+
* This is a listener cm_id. Make sure that
1308+
* future remove_port won't invoke a double
1309+
* cm_id destroy. use atomic xchg to make sure
1310+
* we don't compete with remove_port.
1311+
*/
1312+
if (xchg(&port->priv, NULL) != cm_id)
1313+
return 0;
1314+
} else {
1315+
/*
1316+
* This is a queue cm_id. Make sure that
1317+
* release queue will not destroy the cm_id
1318+
* and schedule all ctrl queues removal (only
1319+
* if the queue is not disconnecting already).
1320+
*/
1321+
spin_lock_irqsave(&queue->state_lock, flags);
1322+
if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
1323+
queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
1324+
spin_unlock_irqrestore(&queue->state_lock, flags);
1325+
nvmet_rdma_queue_disconnect(queue);
1326+
flush_scheduled_work();
1327+
}
1328+
1329+
/*
1330+
* We need to return 1 so that the core will destroy
1331+
* it's own ID. What a great API design..
1332+
*/
1333+
return 1;
1334+
}
1335+
12751336
static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
12761337
struct rdma_cm_event *event)
12771338
{
@@ -1294,20 +1355,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
12941355
break;
12951356
case RDMA_CM_EVENT_ADDR_CHANGE:
12961357
case RDMA_CM_EVENT_DISCONNECTED:
1297-
case RDMA_CM_EVENT_DEVICE_REMOVAL:
12981358
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1299-
/*
1300-
* We can get the device removal callback even for a
1301-
* CM ID that we aren't actually using. In that case
1302-
* the context pointer is NULL, so we shouldn't try
1303-
* to disconnect a non-existing queue. But we also
1304-
* need to return 1 so that the core will destroy
1305-
* it's own ID. What a great API design..
1306-
*/
1307-
if (queue)
1308-
nvmet_rdma_queue_disconnect(queue);
1309-
else
1310-
ret = 1;
1359+
nvmet_rdma_queue_disconnect(queue);
1360+
break;
1361+
case RDMA_CM_EVENT_DEVICE_REMOVAL:
1362+
ret = nvmet_rdma_device_removal(cm_id, queue);
13111363
break;
13121364
case RDMA_CM_EVENT_REJECTED:
13131365
case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1448,10 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
13961448

13971449
static void nvmet_rdma_remove_port(struct nvmet_port *port)
13981450
{
1399-
struct rdma_cm_id *cm_id = port->priv;
1451+
struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
14001452

1401-
rdma_destroy_id(cm_id);
1453+
if (cm_id)
1454+
rdma_destroy_id(cm_id);
14021455
}
14031456

14041457
static struct nvmet_fabrics_ops nvmet_rdma_ops = {

0 commit comments

Comments
 (0)