Skip to content

Commit 227c0c9

Browse files
committed
io_uring: internally retry short reads
We've had a few application cases of not handling short reads properly, and it is understandable as short reads aren't really expected if the application isn't doing non-blocking IO. Now that we retain the iov_iter over retries, we can implement internal retry pretty trivially. This ensures that we don't return a short read, even for buffered reads on page cache conflicts. Cleanup the deep nesting and hard to read nature of io_read() as well, it's much more straight forward now to read and understand. Added a few comments explaining the logic as well. Signed-off-by: Jens Axboe <[email protected]>
1 parent ff6165b commit 227c0c9

File tree

1 file changed

+70
-39
lines changed

1 file changed

+70
-39
lines changed

fs/io_uring.c

Lines changed: 70 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ struct io_async_rw {
510510
struct iovec fast_iov[UIO_FASTIOV];
511511
const struct iovec *free_iovec;
512512
struct iov_iter iter;
513+
size_t bytes_done;
513514
struct wait_page_queue wpq;
514515
};
515516

@@ -916,7 +917,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
916917
bool needs_lock);
917918
static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
918919
const struct iovec *fast_iov,
919-
struct iov_iter *iter);
920+
struct iov_iter *iter, bool force);
920921

921922
static struct kmem_cache *req_cachep;
922923

@@ -2298,7 +2299,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
22982299
ret = io_import_iovec(rw, req, &iovec, &iter, false);
22992300
if (ret < 0)
23002301
goto end_req;
2301-
ret = io_setup_async_rw(req, iovec, inline_vecs, &iter);
2302+
ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
23022303
if (!ret)
23032304
return true;
23042305
kfree(iovec);
@@ -2588,6 +2589,14 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
25882589
{
25892590
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
25902591

2592+
/* add previously done IO, if any */
2593+
if (req->io && req->io->rw.bytes_done > 0) {
2594+
if (ret < 0)
2595+
ret = req->io->rw.bytes_done;
2596+
else
2597+
ret += req->io->rw.bytes_done;
2598+
}
2599+
25912600
if (req->flags & REQ_F_CUR_POS)
25922601
req->file->f_pos = kiocb->ki_pos;
25932602
if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
@@ -2935,6 +2944,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
29352944

29362945
memcpy(&rw->iter, iter, sizeof(*iter));
29372946
rw->free_iovec = NULL;
2947+
rw->bytes_done = 0;
29382948
/* can only be fixed buffers, no need to do anything */
29392949
if (iter->type == ITER_BVEC)
29402950
return;
@@ -2971,9 +2981,9 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
29712981

29722982
static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
29732983
const struct iovec *fast_iov,
2974-
struct iov_iter *iter)
2984+
struct iov_iter *iter, bool force)
29752985
{
2976-
if (!io_op_defs[req->opcode].async_ctx)
2986+
if (!force && !io_op_defs[req->opcode].async_ctx)
29772987
return 0;
29782988
if (!req->io) {
29792989
if (__io_alloc_async_ctx(req))
@@ -3097,8 +3107,7 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
30973107
* succeed, or in rare cases where it fails, we then fall back to using the
30983108
* async worker threads for a blocking retry.
30993109
*/
3100-
static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
3101-
struct iovec *fast_iov, struct iov_iter *iter)
3110+
static bool io_rw_should_retry(struct io_kiocb *req)
31023111
{
31033112
struct kiocb *kiocb = &req->rw.kiocb;
31043113
int ret;
@@ -3107,8 +3116,8 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
31073116
if (req->flags & REQ_F_NOWAIT)
31083117
return false;
31093118

3110-
/* already tried, or we're doing O_DIRECT */
3111-
if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ))
3119+
/* Only for buffered IO */
3120+
if (kiocb->ki_flags & IOCB_DIRECT)
31123121
return false;
31133122
/*
31143123
* just use poll if we can, and don't attempt if the fs doesn't
@@ -3117,16 +3126,6 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
31173126
if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
31183127
return false;
31193128

3120-
/*
3121-
* If request type doesn't require req->io to defer in general,
3122-
* we need to allocate it here
3123-
*/
3124-
if (!req->io) {
3125-
if (__io_alloc_async_ctx(req))
3126-
return false;
3127-
io_req_map_rw(req, iovec, fast_iov, iter);
3128-
}
3129-
31303129
ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
31313130
io_async_buf_func, req);
31323131
if (!ret) {
@@ -3153,8 +3152,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
31533152
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
31543153
struct kiocb *kiocb = &req->rw.kiocb;
31553154
struct iov_iter __iter, *iter = &__iter;
3155+
ssize_t io_size, ret, ret2;
31563156
size_t iov_count;
3157-
ssize_t io_size, ret, ret2 = 0;
31583157

31593158
if (req->io)
31603159
iter = &req->io->rw.iter;
@@ -3164,6 +3163,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
31643163
return ret;
31653164
io_size = ret;
31663165
req->result = io_size;
3166+
ret = 0;
31673167

31683168
/* Ensure we clear previously set non-block flag */
31693169
if (!force_nonblock)
@@ -3178,31 +3178,62 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
31783178
if (unlikely(ret))
31793179
goto out_free;
31803180

3181-
ret2 = io_iter_do_read(req, iter);
3181+
ret = io_iter_do_read(req, iter);
31823182

3183-
/* Catch -EAGAIN return for forced non-blocking submission */
3184-
if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
3185-
kiocb_done(kiocb, ret2, cs);
3186-
} else {
3187-
copy_iov:
3188-
ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
3183+
if (!ret) {
3184+
goto done;
3185+
} else if (ret == -EIOCBQUEUED) {
3186+
ret = 0;
3187+
goto out_free;
3188+
} else if (ret == -EAGAIN) {
3189+
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
31893190
if (ret)
31903191
goto out_free;
3191-
/* it's copied and will be cleaned with ->io */
3192-
iovec = NULL;
3193-
/* if we can retry, do so with the callbacks armed */
3194-
if (io_rw_should_retry(req, iovec, inline_vecs, iter)) {
3195-
ret2 = io_iter_do_read(req, iter);
3196-
if (ret2 == -EIOCBQUEUED) {
3197-
goto out_free;
3198-
} else if (ret2 != -EAGAIN) {
3199-
kiocb_done(kiocb, ret2, cs);
3200-
goto out_free;
3201-
}
3202-
}
3192+
return -EAGAIN;
3193+
} else if (ret < 0) {
3194+
goto out_free;
3195+
}
3196+
3197+
/* read it all, or we did blocking attempt. no retry. */
3198+
if (!iov_iter_count(iter) || !force_nonblock)
3199+
goto done;
3200+
3201+
io_size -= ret;
3202+
copy_iov:
3203+
ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
3204+
if (ret2) {
3205+
ret = ret2;
3206+
goto out_free;
3207+
}
3208+
/* it's copied and will be cleaned with ->io */
3209+
iovec = NULL;
3210+
/* now use our persistent iterator, if we aren't already */
3211+
iter = &req->io->rw.iter;
3212+
retry:
3213+
req->io->rw.bytes_done += ret;
3214+
/* if we can retry, do so with the callbacks armed */
3215+
if (!io_rw_should_retry(req)) {
32033216
kiocb->ki_flags &= ~IOCB_WAITQ;
32043217
return -EAGAIN;
32053218
}
3219+
3220+
/*
3221+
* Now retry read with the IOCB_WAITQ parts set in the iocb. If we
3222+
* get -EIOCBQUEUED, then we'll get a notification when the desired
3223+
* page gets unlocked. We can also get a partial read here, and if we
3224+
* do, then just retry at the new offset.
3225+
*/
3226+
ret = io_iter_do_read(req, iter);
3227+
if (ret == -EIOCBQUEUED) {
3228+
ret = 0;
3229+
goto out_free;
3230+
} else if (ret > 0 && ret < io_size) {
3231+
/* we got some bytes, but not all. retry. */
3232+
goto retry;
3233+
}
3234+
done:
3235+
kiocb_done(kiocb, ret, cs);
3236+
ret = 0;
32063237
out_free:
32073238
if (iovec)
32083239
kfree(iovec);
@@ -3295,7 +3326,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
32953326
kiocb_done(kiocb, ret2, cs);
32963327
} else {
32973328
copy_iov:
3298-
ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
3329+
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
32993330
if (!ret)
33003331
return -EAGAIN;
33013332
}

0 commit comments

Comments
 (0)