Skip to content

Commit c93c622

Browse files
chuckleveramschuma-ntap
authored andcommitted
xprtrdma: Disconnect on registration failure
If rpcrdma_register_external() fails during request marshaling, the current RPC request is killed. Instead, this RPC should be retried after reconnecting the transport instance. The most likely reason for registration failure with FRMR is a failed post_send, which would be due to a remote transport disconnect or memory exhaustion. These issues can be recovered by a retry. Problems encountered in the marshaling logic itself will not be corrected by trying again, so these should still kill a request. Now that we've added a clean exit for marshaling errors, take the opportunity to defang some BUG_ON's. Signed-off-by: Chuck Lever <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent c977dea commit c93c622

File tree

2 files changed

+42
-23
lines changed

2 files changed

+42
-23
lines changed

net/sunrpc/xprtrdma/rpc_rdma.c

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ static const char transfertypes[][12] = {
7777
* Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
7878
* elements. Segments are then coalesced when registered, if possible
7979
* within the selected memreg mode.
80+
*
81+
* Returns positive number of segments converted, or a negative errno.
8082
*/
8183

8284
static int
@@ -103,12 +105,13 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
103105
/* alloc the pagelist for receiving buffer */
104106
ppages[p] = alloc_page(GFP_ATOMIC);
105107
if (!ppages[p])
106-
return 0;
108+
return -ENOMEM;
107109
}
108110
seg[n].mr_page = ppages[p];
109111
seg[n].mr_offset = (void *)(unsigned long) page_base;
110112
seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
111-
BUG_ON(seg[n].mr_len > PAGE_SIZE);
113+
if (seg[n].mr_len > PAGE_SIZE)
114+
return -EIO;
112115
len -= seg[n].mr_len;
113116
++n;
114117
++p;
@@ -117,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
117120

118121
/* Message overflows the seg array */
119122
if (len && n == nsegs)
120-
return 0;
123+
return -EIO;
121124

122125
if (xdrbuf->tail[0].iov_len) {
123126
/* the rpcrdma protocol allows us to omit any trailing
@@ -126,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
126129
return n;
127130
if (n == nsegs)
128131
/* Tail remains, but we're out of segments */
129-
return 0;
132+
return -EIO;
130133
seg[n].mr_page = NULL;
131134
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
132135
seg[n].mr_len = xdrbuf->tail[0].iov_len;
@@ -167,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
167170
* Reply chunk (a counted array):
168171
* N elements:
169172
* 1 - N - HLOO - HLOO - ... - HLOO
173+
*
174+
* Returns positive RPC/RDMA header size, or negative errno.
170175
*/
171176

172-
static unsigned int
177+
static ssize_t
173178
rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
174179
struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
175180
{
176181
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
177182
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
178-
int nsegs, nchunks = 0;
183+
int n, nsegs, nchunks = 0;
179184
unsigned int pos;
180185
struct rpcrdma_mr_seg *seg = req->rl_segments;
181186
struct rpcrdma_read_chunk *cur_rchunk = NULL;
@@ -201,11 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
201206
pos = target->head[0].iov_len;
202207

203208
nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
204-
if (nsegs == 0)
205-
return 0;
209+
if (nsegs < 0)
210+
return nsegs;
206211

207212
do {
208-
int n = rpcrdma_register_external(seg, nsegs,
213+
n = rpcrdma_register_external(seg, nsegs,
209214
cur_wchunk != NULL, r_xprt);
210215
if (n <= 0)
211216
goto out;
@@ -277,7 +282,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
277282
for (pos = 0; nchunks--;)
278283
pos += rpcrdma_deregister_external(
279284
&req->rl_segments[pos], r_xprt);
280-
return 0;
285+
return n;
281286
}
282287

283288
/*
@@ -359,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
359364
* [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
360365
* [2] -- optional padding.
361366
* [3] -- if padded, header only in [1] and data here.
367+
*
368+
* Returns zero on success, otherwise a negative errno.
362369
*/
363370

364371
int
@@ -368,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
368375
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
369376
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
370377
char *base;
371-
size_t hdrlen, rpclen, padlen;
378+
size_t rpclen, padlen;
379+
ssize_t hdrlen;
372380
enum rpcrdma_chunktype rtype, wtype;
373381
struct rpcrdma_msg *headerp;
374382

@@ -439,7 +447,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
439447
/* The following simplification is not true forever */
440448
if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
441449
wtype = rpcrdma_noch;
442-
BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
450+
if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
451+
dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
452+
__func__);
453+
return -EIO;
454+
}
443455

444456
hdrlen = 28; /*sizeof *headerp;*/
445457
padlen = 0;
@@ -464,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
464476
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
465477
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
466478
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
467-
BUG_ON(wtype != rpcrdma_noch);
468-
479+
if (wtype != rpcrdma_noch) {
480+
dprintk("RPC: %s: invalid chunk list\n",
481+
__func__);
482+
return -EIO;
483+
}
469484
} else {
470485
headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
471486
headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
@@ -500,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
500515
hdrlen = rpcrdma_create_chunks(rqst,
501516
&rqst->rq_rcv_buf, headerp, wtype);
502517
}
503-
504-
if (hdrlen == 0)
505-
return -1;
518+
if (hdrlen < 0)
519+
return hdrlen;
506520

507521
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
508522
" headerp 0x%p base 0x%p lkey 0x%x\n",

net/sunrpc/xprtrdma/transport.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -595,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task)
595595
struct rpc_xprt *xprt = rqst->rq_xprt;
596596
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
597597
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
598+
int rc;
598599

599-
/* marshal the send itself */
600-
if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
601-
r_xprt->rx_stats.failed_marshal_count++;
602-
dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
603-
__func__);
604-
return -EIO;
600+
if (req->rl_niovs == 0) {
601+
rc = rpcrdma_marshal_req(rqst);
602+
if (rc < 0)
603+
goto failed_marshal;
605604
}
606605

607606
if (req->rl_reply == NULL) /* e.g. reconnection */
@@ -625,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task)
625624
rqst->rq_bytes_sent = 0;
626625
return 0;
627626

627+
failed_marshal:
628+
r_xprt->rx_stats.failed_marshal_count++;
629+
dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
630+
__func__, rc);
631+
if (rc == -EIO)
632+
return -EIO;
628633
drop_connection:
629634
xprt_disconnect_done(xprt);
630635
return -ENOTCONN; /* implies disconnect */

0 commit comments

Comments
 (0)