Skip to content

Commit 5c635e0

Browse files
tom95858Trond Myklebust
authored andcommitted
RPCRDMA: Fix FRMR registration/invalidate handling.
When the rpc_memreg_strategy is 5, FRMR are used to map RPC data. This mode uses an FRMR to map the RPC data, then invalidates (i.e. unregisers) the data in xprt_rdma_free. These FRMR are used across connections on the same mount, i.e. if the connection goes away on an idle timeout and reconnects later, the FRMR are not destroyed and recreated. This creates a problem for transport errors because the WR that invalidate an FRMR may be flushed (i.e. fail) leaving the FRMR valid. When the FRMR is later used to map an RPC it will fail, tearing down the transport and starting over. Over time, more and more of the FRMR pool end up in the wrong state resulting in seemingly random disconnects. This fix keeps track of the FRMR state explicitly by setting it's state based on the successful completion of a reg/inv WR. If the FRMR is ever used and found to be in the wrong state, an invalidate WR is prepended, re-syncing the FRMR state and avoiding the connection loss. Signed-off-by: Tom Tucker <[email protected]> Signed-off-by: Trond Myklebust <[email protected]>
1 parent bd7ea31 commit 5c635e0

File tree

2 files changed

+45
-8
lines changed

2 files changed

+45
-8
lines changed

net/sunrpc/xprtrdma/verbs.c

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
144144
static inline
145145
void rpcrdma_event_process(struct ib_wc *wc)
146146
{
147+
struct rpcrdma_mw *frmr;
147148
struct rpcrdma_rep *rep =
148149
(struct rpcrdma_rep *)(unsigned long) wc->wr_id;
149150

@@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc)
154155
return;
155156

156157
if (IB_WC_SUCCESS != wc->status) {
157-
dprintk("RPC: %s: %s WC status %X, connection lost\n",
158-
__func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",
159-
wc->status);
158+
dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
159+
__func__, wc->opcode, wc->status);
160160
rep->rr_len = ~0U;
161-
rpcrdma_schedule_tasklet(rep);
161+
if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
162+
rpcrdma_schedule_tasklet(rep);
162163
return;
163164
}
164165

165166
switch (wc->opcode) {
167+
case IB_WC_FAST_REG_MR:
168+
frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
169+
frmr->r.frmr.state = FRMR_IS_VALID;
170+
break;
171+
case IB_WC_LOCAL_INV:
172+
frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
173+
frmr->r.frmr.state = FRMR_IS_INVALID;
174+
break;
166175
case IB_WC_RECV:
167176
rep->rr_len = wc->byte_len;
168177
ib_dma_sync_single_for_cpu(
@@ -1450,6 +1459,11 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
14501459
seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
14511460
seg->mr_offset,
14521461
seg->mr_dmalen, seg->mr_dir);
1462+
if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1463+
dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1464+
__func__,
1465+
seg->mr_dma, seg->mr_offset, seg->mr_dmalen);
1466+
}
14531467
}
14541468

14551469
static void
@@ -1469,7 +1483,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
14691483
struct rpcrdma_xprt *r_xprt)
14701484
{
14711485
struct rpcrdma_mr_seg *seg1 = seg;
1472-
struct ib_send_wr frmr_wr, *bad_wr;
1486+
struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1487+
14731488
u8 key;
14741489
int len, pageoff;
14751490
int i, rc;
@@ -1484,6 +1499,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
14841499
rpcrdma_map_one(ia, seg, writing);
14851500
seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
14861501
len += seg->mr_len;
1502+
BUG_ON(seg->mr_len > PAGE_SIZE);
14871503
++seg;
14881504
++i;
14891505
/* Check for holes */
@@ -1494,26 +1510,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
14941510
dprintk("RPC: %s: Using frmr %p to map %d segments\n",
14951511
__func__, seg1->mr_chunk.rl_mw, i);
14961512

1513+
if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1514+
dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1515+
__func__,
1516+
seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1517+
/* Invalidate before using. */
1518+
memset(&invalidate_wr, 0, sizeof invalidate_wr);
1519+
invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1520+
invalidate_wr.next = &frmr_wr;
1521+
invalidate_wr.opcode = IB_WR_LOCAL_INV;
1522+
invalidate_wr.send_flags = IB_SEND_SIGNALED;
1523+
invalidate_wr.ex.invalidate_rkey =
1524+
seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1525+
DECR_CQCOUNT(&r_xprt->rx_ep);
1526+
post_wr = &invalidate_wr;
1527+
} else
1528+
post_wr = &frmr_wr;
1529+
14971530
/* Bump the key */
14981531
key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
14991532
ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
15001533

15011534
/* Prepare FRMR WR */
15021535
memset(&frmr_wr, 0, sizeof frmr_wr);
1536+
frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
15031537
frmr_wr.opcode = IB_WR_FAST_REG_MR;
1504-
frmr_wr.send_flags = 0; /* unsignaled */
1538+
frmr_wr.send_flags = IB_SEND_SIGNALED;
15051539
frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
15061540
frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
15071541
frmr_wr.wr.fast_reg.page_list_len = i;
15081542
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
15091543
frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1544+
BUG_ON(frmr_wr.wr.fast_reg.length < len);
15101545
frmr_wr.wr.fast_reg.access_flags = (writing ?
15111546
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
15121547
IB_ACCESS_REMOTE_READ);
15131548
frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
15141549
DECR_CQCOUNT(&r_xprt->rx_ep);
15151550

1516-
rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
1551+
rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
15171552

15181553
if (rc) {
15191554
dprintk("RPC: %s: failed ib_post_send for register,"
@@ -1542,8 +1577,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
15421577
rpcrdma_unmap_one(ia, seg++);
15431578

15441579
memset(&invalidate_wr, 0, sizeof invalidate_wr);
1580+
invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
15451581
invalidate_wr.opcode = IB_WR_LOCAL_INV;
1546-
invalidate_wr.send_flags = 0; /* unsignaled */
1582+
invalidate_wr.send_flags = IB_SEND_SIGNALED;
15471583
invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
15481584
DECR_CQCOUNT(&r_xprt->rx_ep);
15491585

net/sunrpc/xprtrdma/xprt_rdma.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
164164
struct {
165165
struct ib_fast_reg_page_list *fr_pgl;
166166
struct ib_mr *fr_mr;
167+
enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
167168
} frmr;
168169
} r;
169170
struct list_head mw_list;

0 commit comments

Comments
 (0)