Skip to content

Commit bd7ea31

Browse files
tom95858Trond Myklebust
authored andcommitted
RPCRDMA: Fix to XDR page base interpretation in marshalling logic.
The RPCRDMA marshalling logic assumed that xdr->page_base was an offset into the first page of xdr->page_list. It is in fact an offset into the xdr->page_list itself, that is, it selects the first page in the page_list and the offset into that page. The symptom depended in part on the rpc_memreg_strategy, if it was FRMR, or some other one-shot mapping mode, the connection would get torn down on a base and bounds error. When the badly marshalled RPC was retransmitted it would reconnect, get the error, and tear down the connection again in a loop forever. This resulted in a hung-mount. For the other modes, it would result in silent data corruption. This bug is most easily reproduced by writing more data than the filesystem has space for. This fix corrects the page_base assumption and otherwise simplifies the iov mapping logic. Signed-off-by: Tom Tucker <[email protected]> Signed-off-by: Trond Myklebust <[email protected]>
1 parent b064eca commit bd7ea31

File tree

1 file changed

+42
-44
lines changed

1 file changed

+42
-44
lines changed

net/sunrpc/xprtrdma/rpc_rdma.c

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
8787
enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
8888
{
8989
int len, n = 0, p;
90+
int page_base;
91+
struct page **ppages;
9092

9193
if (pos == 0 && xdrbuf->head[0].iov_len) {
9294
seg[n].mr_page = NULL;
@@ -95,34 +97,32 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
9597
++n;
9698
}
9799

98-
if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
99-
if (n == nsegs)
100-
return 0;
101-
seg[n].mr_page = xdrbuf->pages[0];
102-
seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
103-
seg[n].mr_len = min_t(u32,
104-
PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
105-
len = xdrbuf->page_len - seg[n].mr_len;
100+
len = xdrbuf->page_len;
101+
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
102+
page_base = xdrbuf->page_base & ~PAGE_MASK;
103+
p = 0;
104+
while (len && n < nsegs) {
105+
seg[n].mr_page = ppages[p];
106+
seg[n].mr_offset = (void *)(unsigned long) page_base;
107+
seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
108+
BUG_ON(seg[n].mr_len > PAGE_SIZE);
109+
len -= seg[n].mr_len;
106110
++n;
107-
p = 1;
108-
while (len > 0) {
109-
if (n == nsegs)
110-
return 0;
111-
seg[n].mr_page = xdrbuf->pages[p];
112-
seg[n].mr_offset = NULL;
113-
seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
114-
len -= seg[n].mr_len;
115-
++n;
116-
++p;
117-
}
111+
++p;
112+
page_base = 0; /* page offset only applies to first page */
118113
}
119114

115+
/* Message overflows the seg array */
116+
if (len && n == nsegs)
117+
return 0;
118+
120119
if (xdrbuf->tail[0].iov_len) {
121120
/* the rpcrdma protocol allows us to omit any trailing
122121
* xdr pad bytes, saving the server an RDMA operation. */
123122
if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
124123
return n;
125124
if (n == nsegs)
125+
/* Tail remains, but we're out of segments */
126126
return 0;
127127
seg[n].mr_page = NULL;
128128
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
@@ -296,6 +296,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
296296
int copy_len;
297297
unsigned char *srcp, *destp;
298298
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
299+
int page_base;
300+
struct page **ppages;
299301

300302
destp = rqst->rq_svec[0].iov_base;
301303
curlen = rqst->rq_svec[0].iov_len;
@@ -324,28 +326,25 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
324326
__func__, destp + copy_len, curlen);
325327
rqst->rq_svec[0].iov_len += curlen;
326328
}
327-
328329
r_xprt->rx_stats.pullup_copy_count += copy_len;
329-
npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
330+
331+
page_base = rqst->rq_snd_buf.page_base;
332+
ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
333+
page_base &= ~PAGE_MASK;
334+
npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
330335
for (i = 0; copy_len && i < npages; i++) {
331-
if (i == 0)
332-
curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
333-
else
334-
curlen = PAGE_SIZE;
336+
curlen = PAGE_SIZE - page_base;
335337
if (curlen > copy_len)
336338
curlen = copy_len;
337339
dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
338340
__func__, i, destp, copy_len, curlen);
339-
srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
340-
KM_SKB_SUNRPC_DATA);
341-
if (i == 0)
342-
memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
343-
else
344-
memcpy(destp, srcp, curlen);
341+
srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
342+
memcpy(destp, srcp+page_base, curlen);
345343
kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
346344
rqst->rq_svec[0].iov_len += curlen;
347345
destp += curlen;
348346
copy_len -= curlen;
347+
page_base = 0;
349348
}
350349
/* header now contains entire send message */
351350
return pad;
@@ -606,6 +605,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
606605
{
607606
int i, npages, curlen, olen;
608607
char *destp;
608+
struct page **ppages;
609+
int page_base;
609610

610611
curlen = rqst->rq_rcv_buf.head[0].iov_len;
611612
if (curlen > copy_len) { /* write chunk header fixup */
@@ -624,32 +625,29 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
624625
olen = copy_len;
625626
i = 0;
626627
rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
628+
page_base = rqst->rq_rcv_buf.page_base;
629+
ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
630+
page_base &= ~PAGE_MASK;
631+
627632
if (copy_len && rqst->rq_rcv_buf.page_len) {
628-
npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
633+
npages = PAGE_ALIGN(page_base +
629634
rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
630635
for (; i < npages; i++) {
631-
if (i == 0)
632-
curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
633-
else
634-
curlen = PAGE_SIZE;
636+
curlen = PAGE_SIZE - page_base;
635637
if (curlen > copy_len)
636638
curlen = copy_len;
637639
dprintk("RPC: %s: page %d"
638640
" srcp 0x%p len %d curlen %d\n",
639641
__func__, i, srcp, copy_len, curlen);
640-
destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
641-
KM_SKB_SUNRPC_DATA);
642-
if (i == 0)
643-
memcpy(destp + rqst->rq_rcv_buf.page_base,
644-
srcp, curlen);
645-
else
646-
memcpy(destp, srcp, curlen);
647-
flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
642+
destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
643+
memcpy(destp + page_base, srcp, curlen);
644+
flush_dcache_page(ppages[i]);
648645
kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
649646
srcp += curlen;
650647
copy_len -= curlen;
651648
if (copy_len == 0)
652649
break;
650+
page_base = 0;
653651
}
654652
rqst->rq_rcv_buf.page_len = olen - copy_len;
655653
} else

0 commit comments

Comments
 (0)