Skip to content

Commit c8b920b

Browse files
chuckleveramschuma-ntap
authored andcommitted
xprtrdma: Basic support for Remote Invalidation
Have frwr's ro_unmap_sync recognize an invalidated rkey that appears as part of a Receive completion. Local invalidation can be skipped for that rkey. Use an out-of-band signaling mechanism to indicate to the server that the client is prepared to receive RDMA Send With Invalidate. Signed-off-by: Chuck Lever <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent 87cfb9a commit c8b920b

File tree

6 files changed

+44
-7
lines changed

6 files changed

+44
-7
lines changed

net/sunrpc/xprtrdma/fmr_ops.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
273273
*/
274274
list_for_each_entry(mw, &req->rl_registered, mw_list)
275275
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
276+
r_xprt->rx_stats.local_inv_needed++;
276277
rc = ib_unmap_fmr(&unmap_list);
277278
if (rc)
278279
goto out_reset;
@@ -330,4 +331,5 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
330331
.ro_init_mr = fmr_op_init_mr,
331332
.ro_release_mr = fmr_op_release_mr,
332333
.ro_displayname = "fmr",
334+
.ro_send_w_inv_ok = 0,
333335
};

net/sunrpc/xprtrdma/frwr_ops.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
* pending send queue WRs before the transport is reconnected.
6868
*/
6969

70+
#include <linux/sunrpc/rpc_rdma.h>
71+
7072
#include "xprt_rdma.h"
7173

7274
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -471,6 +473,7 @@ static void
471473
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
472474
{
473475
struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
476+
struct rpcrdma_rep *rep = req->rl_reply;
474477
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
475478
struct rpcrdma_mw *mw, *tmp;
476479
struct rpcrdma_frmr *f;
@@ -486,6 +489,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
486489
f = NULL;
487490
invalidate_wrs = pos = prev = NULL;
488491
list_for_each_entry(mw, &req->rl_registered, mw_list) {
492+
if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
493+
(mw->mw_handle == rep->rr_inv_rkey)) {
494+
mw->frmr.fr_state = FRMR_IS_INVALID;
495+
continue;
496+
}
497+
489498
pos = __frwr_prepare_linv_wr(mw);
490499

491500
if (!invalidate_wrs)
@@ -495,6 +504,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
495504
prev = pos;
496505
f = &mw->frmr;
497506
}
507+
if (!f)
508+
goto unmap;
498509

499510
/* Strong send queue ordering guarantees that when the
500511
* last WR in the chain completes, all WRs in the chain
@@ -509,6 +520,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
509520
* replaces the QP. The RPC reply handler won't call us
510521
* unless ri_id->qp is a valid pointer.
511522
*/
523+
r_xprt->rx_stats.local_inv_needed++;
512524
rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
513525
if (rc)
514526
goto reset_mrs;
@@ -575,4 +587,5 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
575587
.ro_init_mr = frwr_op_init_mr,
576588
.ro_release_mr = frwr_op_release_mr,
577589
.ro_displayname = "frwr",
590+
.ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
578591
};

net/sunrpc/xprtrdma/rpc_rdma.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
231231

232232
static int
233233
rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
234-
enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg)
234+
enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
235+
bool reminv_expected)
235236
{
236237
int len, n, p, page_base;
237238
struct page **ppages;
@@ -273,6 +274,13 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
273274
if (type == rpcrdma_readch)
274275
return n;
275276

277+
/* When encoding the Write list, some servers need to see an extra
278+
* segment for odd-length Write chunks. The upper layer provides
279+
* space in the tail iovec for this purpose.
280+
*/
281+
if (type == rpcrdma_writech && reminv_expected)
282+
return n;
283+
276284
if (xdrbuf->tail[0].iov_len) {
277285
/* the rpcrdma protocol allows us to omit any trailing
278286
* xdr pad bytes, saving the server an RDMA operation. */
@@ -329,7 +337,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
329337
if (rtype == rpcrdma_areadch)
330338
pos = 0;
331339
seg = req->rl_segments;
332-
nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg);
340+
nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
333341
if (nsegs < 0)
334342
return ERR_PTR(nsegs);
335343

@@ -393,7 +401,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
393401
seg = req->rl_segments;
394402
nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
395403
rqst->rq_rcv_buf.head[0].iov_len,
396-
wtype, seg);
404+
wtype, seg,
405+
r_xprt->rx_ia.ri_reminv_expected);
397406
if (nsegs < 0)
398407
return ERR_PTR(nsegs);
399408

@@ -458,7 +467,8 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
458467
}
459468

460469
seg = req->rl_segments;
461-
nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg);
470+
nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
471+
r_xprt->rx_ia.ri_reminv_expected);
462472
if (nsegs < 0)
463473
return ERR_PTR(nsegs);
464474

net/sunrpc/xprtrdma/transport.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -730,10 +730,11 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
730730
r_xprt->rx_stats.failed_marshal_count,
731731
r_xprt->rx_stats.bad_reply_count,
732732
r_xprt->rx_stats.nomsg_call_count);
733-
seq_printf(seq, "%lu %lu %lu\n",
733+
seq_printf(seq, "%lu %lu %lu %lu\n",
734734
r_xprt->rx_stats.mrs_recovered,
735735
r_xprt->rx_stats.mrs_orphaned,
736-
r_xprt->rx_stats.mrs_allocated);
736+
r_xprt->rx_stats.mrs_allocated,
737+
r_xprt->rx_stats.local_inv_needed);
737738
}
738739

739740
static int

net/sunrpc/xprtrdma/verbs.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
185185
__func__, rep, wc->byte_len);
186186

187187
rep->rr_len = wc->byte_len;
188+
rep->rr_wc_flags = wc->wc_flags;
189+
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
190+
188191
ib_dma_sync_single_for_cpu(rep->rr_device,
189192
rdmab_addr(rep->rr_rdmabuf),
190193
rep->rr_len, DMA_FROM_DEVICE);
@@ -212,12 +215,15 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
212215
const struct rpcrdma_connect_private *pmsg = param->private_data;
213216
unsigned int rsize, wsize;
214217

218+
/* Default settings for RPC-over-RDMA Version One */
219+
r_xprt->rx_ia.ri_reminv_expected = false;
215220
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
216221
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
217222

218223
if (pmsg &&
219224
pmsg->cp_magic == rpcrdma_cmp_magic &&
220225
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
226+
r_xprt->rx_ia.ri_reminv_expected = true;
221227
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
222228
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
223229
}
@@ -568,7 +574,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
568574
/* Prepare RDMA-CM private message */
569575
pmsg->cp_magic = rpcrdma_cmp_magic;
570576
pmsg->cp_version = RPCRDMA_CMP_VERSION;
571-
pmsg->cp_flags = 0;
577+
pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
572578
pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
573579
pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
574580
ep->rep_remote_cma.private_data = pmsg;

net/sunrpc/xprtrdma/xprt_rdma.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ struct rpcrdma_ia {
7474
unsigned int ri_max_frmr_depth;
7575
unsigned int ri_max_inline_write;
7676
unsigned int ri_max_inline_read;
77+
bool ri_reminv_expected;
7778
struct ib_qp_attr ri_qp_attr;
7879
struct ib_qp_init_attr ri_qp_init_attr;
7980
};
@@ -187,6 +188,8 @@ enum {
187188
struct rpcrdma_rep {
188189
struct ib_cqe rr_cqe;
189190
unsigned int rr_len;
191+
int rr_wc_flags;
192+
u32 rr_inv_rkey;
190193
struct ib_device *rr_device;
191194
struct rpcrdma_xprt *rr_rxprt;
192195
struct work_struct rr_work;
@@ -385,6 +388,7 @@ struct rpcrdma_stats {
385388
unsigned long mrs_recovered;
386389
unsigned long mrs_orphaned;
387390
unsigned long mrs_allocated;
391+
unsigned long local_inv_needed;
388392
};
389393

390394
/*
@@ -408,6 +412,7 @@ struct rpcrdma_memreg_ops {
408412
struct rpcrdma_mw *);
409413
void (*ro_release_mr)(struct rpcrdma_mw *);
410414
const char *ro_displayname;
415+
const int ro_send_w_inv_ok;
411416
};
412417

413418
extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;

0 commit comments

Comments
 (0)