Skip to content

Commit bebd031

Browse files
chuckleveramschuma-ntap
authored andcommitted
xprtrdma: Support unplugging an HCA from under an NFS mount
The device driver for the underlying physical device associated with an RPC-over-RDMA transport can be removed while RPC-over-RDMA transports are still in use (ie, while NFS filesystems are still mounted and active). The IB core performs a connection event upcall to request that consumers free all RDMA resources associated with a transport. There may be pending RPCs when this occurs. Care must be taken to release associated resources without leaving references that can trigger a subsequent crash if a signal or soft timeout occurs. We rely on the caller of the transport's ->close method to ensure that the previous RPC task has invoked xprt_release but the transport remains write-locked. A DEVICE_REMOVE upcall forces a disconnect then sleeps. When ->close is invoked, it destroys the transport's H/W resources, then wakes the upcall, which completes and allows the core driver unload to continue. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=266 Signed-off-by: Chuck Lever <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent 91a10c5 commit bebd031

File tree

3 files changed

+101
-9
lines changed

3 files changed

+101
-9
lines changed

net/sunrpc/xprtrdma/transport.c

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -457,19 +457,33 @@ xprt_setup_rdma(struct xprt_create *args)
457457
return ERR_PTR(rc);
458458
}
459459

460-
/*
461-
* Close a connection, during shutdown or timeout/reconnect
460+
/**
461+
* xprt_rdma_close - Close down RDMA connection
462+
* @xprt: generic transport to be closed
463+
*
464+
* Called during transport shutdown reconnect, or device
465+
* removal. Caller holds the transport's write lock.
462466
*/
463467
static void
464468
xprt_rdma_close(struct rpc_xprt *xprt)
465469
{
466470
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
471+
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
472+
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
473+
474+
dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
467475

468-
dprintk("RPC: %s: closing\n", __func__);
469-
if (r_xprt->rx_ep.rep_connected > 0)
476+
if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
477+
xprt_clear_connected(xprt);
478+
rpcrdma_ia_remove(ia);
479+
return;
480+
}
481+
if (ep->rep_connected == -ENODEV)
482+
return;
483+
if (ep->rep_connected > 0)
470484
xprt->reestablish_timeout = 0;
471485
xprt_disconnect_done(xprt);
472-
rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
486+
rpcrdma_ep_disconnect(ep, ia);
473487
}
474488

475489
static void
@@ -680,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
680694
* xprt_rdma_send_request - marshal and send an RPC request
681695
* @task: RPC task with an RPC message in rq_snd_buf
682696
*
697+
* Caller holds the transport's write lock.
698+
*
683699
* Return values:
684700
* 0: The request has been sent
685701
* ENOTCONN: Caller needs to invoke connect logic then call again
@@ -706,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
706722
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
707723
int rc = 0;
708724

725+
if (!xprt_connected(xprt))
726+
goto drop_connection;
727+
709728
/* On retransmit, remove any previously registered chunks */
710729
if (unlikely(!list_empty(&req->rl_registered)))
711730
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);

net/sunrpc/xprtrdma/verbs.c

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@
6969
/*
7070
* internal functions
7171
*/
72+
static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
73+
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
7274

7375
static struct workqueue_struct *rpcrdma_receive_wq;
7476

@@ -262,6 +264,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
262264
__func__, ep);
263265
complete(&ia->ri_done);
264266
break;
267+
case RDMA_CM_EVENT_DEVICE_REMOVAL:
268+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
269+
pr_info("rpcrdma: removing device for %pIS:%u\n",
270+
sap, rpc_get_port(sap));
271+
#endif
272+
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
273+
ep->rep_connected = -ENODEV;
274+
xprt_force_disconnect(&xprt->rx_xprt);
275+
wait_for_completion(&ia->ri_remove_done);
276+
277+
ia->ri_id = NULL;
278+
ia->ri_pd = NULL;
279+
ia->ri_device = NULL;
280+
/* Return 1 to ensure the core destroys the id. */
281+
return 1;
265282
case RDMA_CM_EVENT_ESTABLISHED:
266283
connstate = 1;
267284
ib_query_qp(ia->ri_id->qp, attr,
@@ -291,9 +308,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
291308
goto connected;
292309
case RDMA_CM_EVENT_DISCONNECTED:
293310
connstate = -ECONNABORTED;
294-
goto connected;
295-
case RDMA_CM_EVENT_DEVICE_REMOVAL:
296-
connstate = -ENODEV;
297311
connected:
298312
dprintk("RPC: %s: %sconnected\n",
299313
__func__, connstate > 0 ? "" : "dis");
@@ -346,6 +360,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
346360
int rc;
347361

348362
init_completion(&ia->ri_done);
363+
init_completion(&ia->ri_remove_done);
349364

350365
id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
351366
IB_QPT_RC);
@@ -468,6 +483,56 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
468483
return rc;
469484
}
470485

486+
/**
487+
* rpcrdma_ia_remove - Handle device driver unload
488+
* @ia: interface adapter being removed
489+
*
490+
* Divest transport H/W resources associated with this adapter,
491+
* but allow it to be restored later.
492+
*/
493+
void
494+
rpcrdma_ia_remove(struct rpcrdma_ia *ia)
495+
{
496+
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
497+
rx_ia);
498+
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
499+
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
500+
struct rpcrdma_req *req;
501+
struct rpcrdma_rep *rep;
502+
503+
cancel_delayed_work_sync(&buf->rb_refresh_worker);
504+
505+
/* This is similar to rpcrdma_ep_destroy, but:
506+
* - Don't cancel the connect worker.
507+
* - Don't call rpcrdma_ep_disconnect, which waits
508+
* for another conn upcall, which will deadlock.
509+
* - rdma_disconnect is unneeded, the underlying
510+
* connection is already gone.
511+
*/
512+
if (ia->ri_id->qp) {
513+
ib_drain_qp(ia->ri_id->qp);
514+
rdma_destroy_qp(ia->ri_id);
515+
ia->ri_id->qp = NULL;
516+
}
517+
ib_free_cq(ep->rep_attr.recv_cq);
518+
ib_free_cq(ep->rep_attr.send_cq);
519+
520+
/* The ULP is responsible for ensuring all DMA
521+
* mappings and MRs are gone.
522+
*/
523+
list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
524+
rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
525+
list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
526+
rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
527+
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
528+
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
529+
}
530+
rpcrdma_destroy_mrs(buf);
531+
532+
/* Allow waiters to continue */
533+
complete(&ia->ri_remove_done);
534+
}
535+
471536
/**
472537
* rpcrdma_ia_close - Clean up/close an IA.
473538
* @ia: interface adapter to close
@@ -1080,7 +1145,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
10801145

10811146
out_nomws:
10821147
dprintk("RPC: %s: no MWs available\n", __func__);
1083-
schedule_delayed_work(&buf->rb_refresh_worker, 0);
1148+
if (r_xprt->rx_ep.rep_connected != -ENODEV)
1149+
schedule_delayed_work(&buf->rb_refresh_worker, 0);
10841150

10851151
/* Allow the reply handler and refresh worker to run */
10861152
cond_resched();

net/sunrpc/xprtrdma/xprt_rdma.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct rpcrdma_ia {
6969
struct rdma_cm_id *ri_id;
7070
struct ib_pd *ri_pd;
7171
struct completion ri_done;
72+
struct completion ri_remove_done;
7273
int ri_async_rc;
7374
unsigned int ri_max_segs;
7475
unsigned int ri_max_frmr_depth;
@@ -78,10 +79,15 @@ struct rpcrdma_ia {
7879
bool ri_reminv_expected;
7980
bool ri_implicit_roundup;
8081
enum ib_mr_type ri_mrtype;
82+
unsigned long ri_flags;
8183
struct ib_qp_attr ri_qp_attr;
8284
struct ib_qp_init_attr ri_qp_init_attr;
8385
};
8486

87+
enum {
88+
RPCRDMA_IAF_REMOVING = 0,
89+
};
90+
8591
/*
8692
* RDMA Endpoint -- one per transport instance
8793
*/
@@ -511,6 +517,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
511517
* Interface Adapter calls - xprtrdma/verbs.c
512518
*/
513519
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
520+
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
514521
void rpcrdma_ia_close(struct rpcrdma_ia *);
515522
bool frwr_is_supported(struct rpcrdma_ia *);
516523
bool fmr_is_supported(struct rpcrdma_ia *);

0 commit comments

Comments
 (0)