Skip to content

Commit 28b4afe

Browse files
committed
Merge tag 'io_uring-5.13-2021-05-07' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "Mostly fixes for merge window merged code. In detail: - Error case memory leak fixes (Colin, Zqiang) - Add the tools/io_uring/ to the list of maintained files (Lukas) - Set of fixes for the modified buffer registration API (Pavel) - Sanitize io thread setup on x86 (Stefan) - Ensure we truncate transfer count for registered buffers (Thadeu)" * tag 'io_uring-5.13-2021-05-07' of git://git.kernel.dk/linux-block: x86/process: setup io_threads more like normal user space threads MAINTAINERS: add io_uring tool to IO_URING io_uring: truncate lengths larger than MAX_RW_COUNT on provide buffers io_uring: Fix memory leak in io_sqe_buffers_register() io_uring: Fix premature return from loop and memory leak io_uring: fix unchecked error in switch_start() io_uring: allow empty slots for reg buffers io_uring: add more build check for uapi io_uring: dont overlap internal and user req flags io_uring: fix drain with rsrc CQEs
2 parents a647034 + 50b7b6f commit 28b4afe

File tree

3 files changed

+70
-19
lines changed

3 files changed

+70
-19
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9552,6 +9552,7 @@ F: fs/io-wq.h
95529552
F: fs/io_uring.c
95539553
F: include/linux/io_uring.h
95549554
F: include/uapi/linux/io_uring.h
9555+
F: tools/io_uring/
95559556

95569557
IPMI SUBSYSTEM
95579558
M: Corey Minyard <[email protected]>

arch/x86/kernel/process.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
156156
#endif
157157

158158
/* Kernel thread ? */
159-
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
159+
if (unlikely(p->flags & PF_KTHREAD)) {
160160
memset(childregs, 0, sizeof(struct pt_regs));
161161
kthread_frame_init(frame, sp, arg);
162162
return 0;
@@ -172,6 +172,23 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
172172
task_user_gs(p) = get_user_gs(current_pt_regs());
173173
#endif
174174

175+
if (unlikely(p->flags & PF_IO_WORKER)) {
176+
/*
177+
* An IO thread is a user space thread, but it doesn't
178+
* return to ret_after_fork().
179+
*
180+
* In order to indicate that to tools like gdb,
181+
* we reset the stack and instruction pointers.
182+
*
183+
* It does the same kernel frame setup to return to a kernel
184+
* function that a kernel thread does.
185+
*/
186+
childregs->sp = 0;
187+
childregs->ip = 0;
188+
kthread_frame_init(frame, sp, arg);
189+
return 0;
190+
}
191+
175192
/* Set a new TLS for the child thread? */
176193
if (clone_flags & CLONE_SETTLS)
177194
ret = set_new_tls(p, tls);

fs/io_uring.c

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ struct io_rsrc_data {
251251
struct io_buffer {
252252
struct list_head list;
253253
__u64 addr;
254-
__s32 len;
254+
__u32 len;
255255
__u16 bid;
256256
};
257257

@@ -456,6 +456,7 @@ struct io_ring_ctx {
456456
spinlock_t rsrc_ref_lock;
457457
struct io_rsrc_node *rsrc_node;
458458
struct io_rsrc_node *rsrc_backup_node;
459+
struct io_mapped_ubuf *dummy_ubuf;
459460

460461
struct io_restriction restrictions;
461462

@@ -702,7 +703,8 @@ enum {
702703
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
703704
REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
704705

705-
REQ_F_FAIL_LINK_BIT,
706+
/* first byte is taken by user flags, shift it to not overlap */
707+
REQ_F_FAIL_LINK_BIT = 8,
706708
REQ_F_INFLIGHT_BIT,
707709
REQ_F_CUR_POS_BIT,
708710
REQ_F_NOWAIT_BIT,
@@ -1157,6 +1159,12 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
11571159
goto err;
11581160
__hash_init(ctx->cancel_hash, 1U << hash_bits);
11591161

1162+
ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL);
1163+
if (!ctx->dummy_ubuf)
1164+
goto err;
1165+
/* set invalid range, so io_import_fixed() fails meeting it */
1166+
ctx->dummy_ubuf->ubuf = -1UL;
1167+
11601168
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
11611169
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
11621170
goto err;
@@ -1184,6 +1192,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
11841192
INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list);
11851193
return ctx;
11861194
err:
1195+
kfree(ctx->dummy_ubuf);
11871196
kfree(ctx->cancel_hash);
11881197
kfree(ctx);
11891198
return NULL;
@@ -3977,7 +3986,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
39773986
break;
39783987

39793988
buf->addr = addr;
3980-
buf->len = pbuf->len;
3989+
buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
39813990
buf->bid = bid;
39823991
addr += pbuf->len;
39833992
bid++;
@@ -6503,14 +6512,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
65036512
req->work.creds = NULL;
65046513

65056514
/* enforce forwards compatibility on users */
6506-
if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) {
6507-
req->flags = 0;
6515+
if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
65086516
return -EINVAL;
6509-
}
6510-
65116517
if (unlikely(req->opcode >= IORING_OP_LAST))
65126518
return -EINVAL;
6513-
65146519
if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
65156520
return -EACCES;
65166521

@@ -7539,6 +7544,7 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
75397544
io_ring_submit_lock(ctx, lock_ring);
75407545
spin_lock_irqsave(&ctx->completion_lock, flags);
75417546
io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
7547+
ctx->cq_extra++;
75427548
io_commit_cqring(ctx);
75437549
spin_unlock_irqrestore(&ctx->completion_lock, flags);
75447550
io_cqring_ev_posted(ctx);
@@ -8111,11 +8117,13 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
81118117
struct io_mapped_ubuf *imu = *slot;
81128118
unsigned int i;
81138119

8114-
for (i = 0; i < imu->nr_bvecs; i++)
8115-
unpin_user_page(imu->bvec[i].bv_page);
8116-
if (imu->acct_pages)
8117-
io_unaccount_mem(ctx, imu->acct_pages);
8118-
kvfree(imu);
8120+
if (imu != ctx->dummy_ubuf) {
8121+
for (i = 0; i < imu->nr_bvecs; i++)
8122+
unpin_user_page(imu->bvec[i].bv_page);
8123+
if (imu->acct_pages)
8124+
io_unaccount_mem(ctx, imu->acct_pages);
8125+
kvfree(imu);
8126+
}
81198127
*slot = NULL;
81208128
}
81218129

@@ -8132,7 +8140,7 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
81328140
for (i = 0; i < ctx->nr_user_bufs; i++)
81338141
io_buffer_unmap(ctx, &ctx->user_bufs[i]);
81348142
kfree(ctx->user_bufs);
8135-
kfree(ctx->buf_data);
8143+
io_rsrc_data_free(ctx->buf_data);
81368144
ctx->user_bufs = NULL;
81378145
ctx->buf_data = NULL;
81388146
ctx->nr_user_bufs = 0;
@@ -8255,6 +8263,11 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
82558263
size_t size;
82568264
int ret, pret, nr_pages, i;
82578265

8266+
if (!iov->iov_base) {
8267+
*pimu = ctx->dummy_ubuf;
8268+
return 0;
8269+
}
8270+
82588271
ubuf = (unsigned long) iov->iov_base;
82598272
end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
82608273
start = ubuf >> PAGE_SHIFT;
@@ -8352,7 +8365,9 @@ static int io_buffer_validate(struct iovec *iov)
83528365
* constraints here, we'll -EINVAL later when IO is
83538366
* submitted if they are wrong.
83548367
*/
8355-
if (!iov->iov_base || !iov->iov_len)
8368+
if (!iov->iov_base)
8369+
return iov->iov_len ? -EFAULT : 0;
8370+
if (!iov->iov_len)
83568371
return -EFAULT;
83578372

83588373
/* arbitrary limit, but we need something */
@@ -8385,7 +8400,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
83858400
return -ENOMEM;
83868401
ret = io_buffers_map_alloc(ctx, nr_args);
83878402
if (ret) {
8388-
kfree(data);
8403+
io_rsrc_data_free(data);
83898404
return ret;
83908405
}
83918406

@@ -8402,6 +8417,10 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
84028417
ret = io_buffer_validate(&iov);
84038418
if (ret)
84048419
break;
8420+
if (!iov.iov_base && tag) {
8421+
ret = -EINVAL;
8422+
break;
8423+
}
84058424

84068425
ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
84078426
&last_hpage);
@@ -8451,12 +8470,16 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
84518470
err = io_buffer_validate(&iov);
84528471
if (err)
84538472
break;
8473+
if (!iov.iov_base && tag) {
8474+
err = -EINVAL;
8475+
break;
8476+
}
84548477
err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
84558478
if (err)
84568479
break;
84578480

84588481
i = array_index_nospec(offset, ctx->nr_user_bufs);
8459-
if (ctx->user_bufs[i]) {
8482+
if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
84608483
err = io_queue_rsrc_removal(ctx->buf_data, offset,
84618484
ctx->rsrc_node, ctx->user_bufs[i]);
84628485
if (unlikely(err)) {
@@ -8604,6 +8627,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
86048627
if (ctx->hash_map)
86058628
io_wq_put_hash(ctx->hash_map);
86068629
kfree(ctx->cancel_hash);
8630+
kfree(ctx->dummy_ubuf);
86078631
kfree(ctx);
86088632
}
86098633

@@ -9607,7 +9631,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
96079631
if (ret)
96089632
goto err;
96099633
/* always set a rsrc node */
9610-
io_rsrc_node_switch_start(ctx);
9634+
ret = io_rsrc_node_switch_start(ctx);
9635+
if (ret)
9636+
goto err;
96119637
io_rsrc_node_switch(ctx, NULL);
96129638

96139639
memset(&p->sq_off, 0, sizeof(p->sq_off));
@@ -10136,6 +10162,13 @@ static int __init io_uring_init(void)
1013610162
BUILD_BUG_SQE_ELEM(42, __u16, personality);
1013710163
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
1013810164

10165+
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
10166+
sizeof(struct io_uring_rsrc_update));
10167+
BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
10168+
sizeof(struct io_uring_rsrc_update2));
10169+
/* should fit into one byte */
10170+
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
10171+
1013910172
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
1014010173
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
1014110174
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |

0 commit comments

Comments
 (0)