Skip to content

Commit 561f0ed

Browse files
author
J. Bruce Fields
committed
nfsd4: allow large readdirs
Currently we limit readdir results to a single page. This can result in a performance regression compared to NFSv3 when reading large directories. Signed-off-by: J. Bruce Fields <[email protected]>
1 parent 32aaa62 commit 561f0ed

File tree

3 files changed

+82
-69
lines changed

3 files changed

+82
-69
lines changed

fs/nfsd/nfs4proc.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
15001500

15011501
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
15021502
{
1503+
u32 maxcount = svc_max_payload(rqstp);
15031504
u32 rlen = op->u.readdir.rd_maxcount;
15041505

1505-
if (rlen > PAGE_SIZE)
1506-
rlen = PAGE_SIZE;
1506+
if (rlen > maxcount)
1507+
rlen = maxcount;
15071508

1508-
return (op_encode_hdr_size + op_encode_verifier_maxsz)
1509-
* sizeof(__be32) + rlen;
1509+
return (op_encode_hdr_size + op_encode_verifier_maxsz +
1510+
XDR_QUADLEN(rlen)) * sizeof(__be32);
15101511
}
15111512

15121513
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)

fs/nfsd/nfs4xdr.c

Lines changed: 75 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
25752575
}
25762576

25772577
static __be32
2578-
nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2579-
const char *name, int namlen, __be32 **p, int buflen)
2578+
nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
2579+
const char *name, int namlen)
25802580
{
25812581
struct svc_export *exp = cd->rd_fhp->fh_export;
25822582
struct dentry *dentry;
@@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
26282628

26292629
}
26302630
out_encode:
2631-
nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry,
2632-
cd->rd_bmval,
2631+
nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
26332632
cd->rd_rqstp, ignore_crossmnt);
26342633
out_put:
26352634
dput(dentry);
@@ -2638,9 +2637,12 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
26382637
}
26392638

26402639
static __be32 *
2641-
nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
2640+
nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
26422641
{
2643-
if (buflen < 6)
2642+
__be32 *p;
2643+
2644+
p = xdr_reserve_space(xdr, 6);
2645+
if (!p)
26442646
return NULL;
26452647
*p++ = htonl(2);
26462648
*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2657,30 +2659,38 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
26572659
{
26582660
struct readdir_cd *ccd = ccdv;
26592661
struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
2660-
int buflen;
2661-
__be32 *p = cd->buffer;
2662-
__be32 *cookiep;
2662+
struct xdr_stream *xdr = cd->xdr;
2663+
int start_offset = xdr->buf->len;
2664+
int cookie_offset;
2665+
int entry_bytes;
26632666
__be32 nfserr = nfserr_toosmall;
2667+
__be64 wire_offset;
2668+
__be32 *p;
26642669

26652670
/* In nfsv4, "." and ".." never make it onto the wire.. */
26662671
if (name && isdotent(name, namlen)) {
26672672
cd->common.err = nfs_ok;
26682673
return 0;
26692674
}
26702675

2671-
if (cd->offset)
2672-
xdr_encode_hyper(cd->offset, (u64) offset);
2676+
if (cd->cookie_offset) {
2677+
wire_offset = cpu_to_be64(offset);
2678+
write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
2679+
&wire_offset, 8);
2680+
}
26732681

2674-
buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
2675-
if (buflen < 0)
2682+
p = xdr_reserve_space(xdr, 4);
2683+
if (!p)
26762684
goto fail;
2677-
26782685
*p++ = xdr_one; /* mark entry present */
2679-
cookiep = p;
2686+
cookie_offset = xdr->buf->len;
2687+
p = xdr_reserve_space(xdr, 3*4 + namlen);
2688+
if (!p)
2689+
goto fail;
26802690
p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
26812691
p = xdr_encode_array(p, name, namlen); /* name length & name */
26822692

2683-
nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
2693+
nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
26842694
switch (nfserr) {
26852695
case nfs_ok:
26862696
break;
@@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
26992709
*/
27002710
if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
27012711
goto fail;
2702-
p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
2712+
p = nfsd4_encode_rdattr_error(xdr, nfserr);
27032713
if (p == NULL) {
27042714
nfserr = nfserr_toosmall;
27052715
goto fail;
27062716
}
27072717
}
2708-
cd->buflen -= (p - cd->buffer);
2709-
cd->buffer = p;
2710-
cd->offset = cookiep;
2718+
nfserr = nfserr_toosmall;
2719+
entry_bytes = xdr->buf->len - start_offset;
2720+
if (entry_bytes > cd->rd_maxcount)
2721+
goto fail;
2722+
cd->rd_maxcount -= entry_bytes;
2723+
cd->cookie_offset = cookie_offset;
27112724
skip_entry:
27122725
cd->common.err = nfs_ok;
27132726
return 0;
27142727
fail:
2728+
xdr_truncate_encode(xdr, start_offset);
27152729
cd->common.err = nfserr;
27162730
return -EINVAL;
27172731
}
@@ -3206,10 +3220,11 @@ static __be32
32063220
nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
32073221
{
32083222
int maxcount;
3223+
int bytes_left;
32093224
loff_t offset;
3225+
__be64 wire_offset;
32103226
struct xdr_stream *xdr = &resp->xdr;
32113227
int starting_len = xdr->buf->len;
3212-
__be32 *page, *tailbase;
32133228
__be32 *p;
32143229

32153230
if (nfserr)
@@ -3219,72 +3234,70 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
32193234
if (!p)
32203235
return nfserr_resource;
32213236

3222-
if (resp->xdr.buf->page_len)
3223-
return nfserr_resource;
3224-
if (!*resp->rqstp->rq_next_page)
3225-
return nfserr_resource;
3226-
32273237
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
32283238
WRITE32(0);
32293239
WRITE32(0);
32303240
resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
32313241
- (char *)resp->xdr.buf->head[0].iov_base;
3232-
tailbase = p;
3233-
3234-
maxcount = PAGE_SIZE;
3235-
if (maxcount > readdir->rd_maxcount)
3236-
maxcount = readdir->rd_maxcount;
32373242

32383243
/*
3239-
* Convert from bytes to words, account for the two words already
3240-
* written, make sure to leave two words at the end for the next
3241-
* pointer and eof field.
3244+
* Number of bytes left for directory entries allowing for the
3245+
* final 8 bytes of the readdir and a following failed op:
3246+
*/
3247+
bytes_left = xdr->buf->buflen - xdr->buf->len
3248+
- COMPOUND_ERR_SLACK_SPACE - 8;
3249+
if (bytes_left < 0) {
3250+
nfserr = nfserr_resource;
3251+
goto err_no_verf;
3252+
}
3253+
maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
3254+
/*
3255+
* Note the rfc defines rd_maxcount as the size of the
3256+
* READDIR4resok structure, which includes the verifier above
3257+
* and the 8 bytes encoded at the end of this function:
32423258
*/
3243-
maxcount = (maxcount >> 2) - 4;
3244-
if (maxcount < 0) {
3245-
nfserr = nfserr_toosmall;
3259+
if (maxcount < 16) {
3260+
nfserr = nfserr_toosmall;
32463261
goto err_no_verf;
32473262
}
3263+
maxcount = min_t(int, maxcount-16, bytes_left);
32483264

3249-
page = page_address(*(resp->rqstp->rq_next_page++));
3265+
readdir->xdr = xdr;
3266+
readdir->rd_maxcount = maxcount;
32503267
readdir->common.err = 0;
3251-
readdir->buflen = maxcount;
3252-
readdir->buffer = page;
3253-
readdir->offset = NULL;
3268+
readdir->cookie_offset = 0;
32543269

32553270
offset = readdir->rd_cookie;
32563271
nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
32573272
&offset,
32583273
&readdir->common, nfsd4_encode_dirent);
32593274
if (nfserr == nfs_ok &&
32603275
readdir->common.err == nfserr_toosmall &&
3261-
readdir->buffer == page)
3262-
nfserr = nfserr_toosmall;
3276+
xdr->buf->len == starting_len + 8) {
3277+
/* nothing encoded; which limit did we hit?: */
3278+
if (maxcount - 16 < bytes_left)
3279+
/* It was the fault of rd_maxcount: */
3280+
nfserr = nfserr_toosmall;
3281+
else
3282+
/* We ran out of buffer space: */
3283+
nfserr = nfserr_resource;
3284+
}
32633285
if (nfserr)
32643286
goto err_no_verf;
32653287

3266-
if (readdir->offset)
3267-
xdr_encode_hyper(readdir->offset, offset);
3288+
if (readdir->cookie_offset) {
3289+
wire_offset = cpu_to_be64(offset);
3290+
write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
3291+
&wire_offset, 8);
3292+
}
32683293

3269-
p = readdir->buffer;
3294+
p = xdr_reserve_space(xdr, 8);
3295+
if (!p) {
3296+
WARN_ON_ONCE(1);
3297+
goto err_no_verf;
3298+
}
32703299
*p++ = 0; /* no more entries */
32713300
*p++ = htonl(readdir->common.err == nfserr_eof);
3272-
resp->xdr.buf->page_len = ((char *)p) -
3273-
(char*)page_address(*(resp->rqstp->rq_next_page-1));
3274-
xdr->buf->len += xdr->buf->page_len;
3275-
3276-
xdr->iov = xdr->buf->tail;
3277-
3278-
xdr->page_ptr++;
3279-
xdr->buf->buflen -= PAGE_SIZE;
3280-
xdr->iov = xdr->buf->tail;
3281-
3282-
/* Use rest of head for padding and remaining ops: */
3283-
resp->xdr.buf->tail[0].iov_base = tailbase;
3284-
resp->xdr.buf->tail[0].iov_len = 0;
3285-
resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
3286-
resp->xdr.end = resp->xdr.p +
3287-
(PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
32883301

32893302
return 0;
32903303
err_no_verf:

fs/nfsd/xdr4.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
287287
struct svc_fh * rd_fhp; /* response */
288288

289289
struct readdir_cd common;
290-
__be32 * buffer;
291-
int buflen;
292-
__be32 * offset;
290+
struct xdr_stream *xdr;
291+
int cookie_offset;
293292
};
294293

295294
struct nfsd4_release_lockowner {

0 commit comments

Comments
 (0)