Skip to content

Commit 25d5529

Browse files
chuckleverJ. Bruce Fields
authored andcommitted
svcrdma: support Remote Invalidation
Support Remote Invalidation. A private message is exchanged with the client upon RDMA transport connect that indicates whether Send With Invalidation may be used by the server to send RPC replies. The invalidate_rkey is arbitrarily chosen from among rkeys present in the RPC-over-RDMA header's chunk lists. Send With Invalidate improves performance only when clients can recognize, while processing an RPC reply, that an rkey has already been invalidated. That has been submitted as a separate change. In the future, the RPC-over-RDMA protocol might support Remote Invalidation properly. The protocol needs to enable signaling between peers to indicate when Remote Invalidation can be used for each individual RPC. Signed-off-by: Chuck Lever <[email protected]> Reviewed-by: Sagi Grimberg <[email protected]> Signed-off-by: J. Bruce Fields <[email protected]>
1 parent cc9d834 commit 25d5529

File tree

3 files changed

+65
-6
lines changed

3 files changed

+65
-6
lines changed

include/linux/sunrpc/svc_rdma.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ struct svcxprt_rdma {
137137
int sc_ord; /* RDMA read limit */
138138
int sc_max_sge;
139139
int sc_max_sge_rd; /* max sge for read target */
140+
bool sc_snd_w_inv; /* OK to use Send With Invalidate */
140141

141142
atomic_t sc_sq_count; /* Number of SQ WR on queue */
142143
unsigned int sc_sq_depth; /* Depth of SQ */

net/sunrpc/xprtrdma/svc_rdma_sendto.c

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
225225
return rp_ary;
226226
}
227227

228+
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
229+
* Responder's choice: requester signals it can handle Send With
230+
* Invalidate, and responder chooses one rkey to invalidate.
231+
*
232+
* Find a candidate rkey to invalidate when sending a reply. Picks the
233+
* first rkey it finds in the chunks lists.
234+
*
235+
* Returns zero if RPC's chunk lists are empty.
236+
*/
237+
static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
238+
struct rpcrdma_write_array *wr_ary,
239+
struct rpcrdma_write_array *rp_ary)
240+
{
241+
struct rpcrdma_read_chunk *rd_ary;
242+
struct rpcrdma_segment *arg_ch;
243+
u32 inv_rkey;
244+
245+
inv_rkey = 0;
246+
247+
rd_ary = svc_rdma_get_read_chunk(rdma_argp);
248+
if (rd_ary) {
249+
inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
250+
goto out;
251+
}
252+
253+
if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
254+
arg_ch = &wr_ary->wc_array[0].wc_target;
255+
inv_rkey = be32_to_cpu(arg_ch->rs_handle);
256+
goto out;
257+
}
258+
259+
if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
260+
arg_ch = &rp_ary->wc_array[0].wc_target;
261+
inv_rkey = be32_to_cpu(arg_ch->rs_handle);
262+
goto out;
263+
}
264+
265+
out:
266+
dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
267+
return inv_rkey;
268+
}
269+
228270
/* Assumptions:
229271
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
230272
*/
@@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
464506
struct page *page,
465507
struct rpcrdma_msg *rdma_resp,
466508
struct svc_rdma_req_map *vec,
467-
int byte_count)
509+
int byte_count,
510+
u32 inv_rkey)
468511
{
469512
struct svc_rdma_op_ctxt *ctxt;
470513
struct ib_send_wr send_wr;
@@ -535,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma,
535578
send_wr.wr_cqe = &ctxt->cqe;
536579
send_wr.sg_list = ctxt->sge;
537580
send_wr.num_sge = sge_no;
538-
send_wr.opcode = IB_WR_SEND;
581+
if (inv_rkey) {
582+
send_wr.opcode = IB_WR_SEND_WITH_INV;
583+
send_wr.ex.invalidate_rkey = inv_rkey;
584+
} else
585+
send_wr.opcode = IB_WR_SEND;
539586
send_wr.send_flags = IB_SEND_SIGNALED;
540587

541588
ret = svc_rdma_send(rdma, &send_wr);
@@ -567,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
567614
int inline_bytes;
568615
struct page *res_page;
569616
struct svc_rdma_req_map *vec;
617+
u32 inv_rkey;
570618

571619
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
572620

@@ -577,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
577625
wr_ary = svc_rdma_get_write_array(rdma_argp);
578626
rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
579627

628+
inv_rkey = 0;
629+
if (rdma->sc_snd_w_inv)
630+
inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
631+
580632
/* Build an req vec for the XDR */
581633
vec = svc_rdma_get_req_map(rdma);
582634
ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
@@ -619,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
619671
goto err1;
620672

621673
ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
622-
inline_bytes);
674+
inline_bytes, inv_rkey);
623675
if (ret < 0)
624676
goto err0;
625677

net/sunrpc/xprtrdma/svc_rdma_transport.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -657,9 +657,14 @@ svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
657657
if (pmsg &&
658658
pmsg->cp_magic == rpcrdma_cmp_magic &&
659659
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
660-
dprintk("svcrdma: client send_size %u, recv_size %u\n",
660+
newxprt->sc_snd_w_inv = pmsg->cp_flags &
661+
RPCRDMA_CMP_F_SND_W_INV_OK;
662+
663+
dprintk("svcrdma: client send_size %u, recv_size %u "
664+
"remote inv %ssupported\n",
661665
rpcrdma_decode_buffer_size(pmsg->cp_send_size),
662-
rpcrdma_decode_buffer_size(pmsg->cp_recv_size));
666+
rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
667+
newxprt->sc_snd_w_inv ? "" : "un");
663668
}
664669
}
665670

@@ -1093,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
10931098
dev->attrs.max_fast_reg_page_list_len;
10941099
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
10951100
newxprt->sc_reader = rdma_read_chunk_frmr;
1096-
}
1101+
} else
1102+
newxprt->sc_snd_w_inv = false;
10971103

10981104
/*
10991105
* Determine if a DMA MR is required and if so, what privs are required

0 commit comments

Comments
 (0)