Skip to content

Commit d9d0521

Browse files
isilenceaxboe
authored andcommitted
io_uring: stop SQPOLL submit on creator's death
When the creator of SQPOLL io_uring dies (i.e. sqo_task), we don't want its internals like ->files and ->mm to be poked by the SQPOLL task, it have never been nice and recently got racy. That can happen when the owner undergoes destruction and SQPOLL tasks tries to submit new requests in parallel, and so calls io_sq_thread_acquire*(). That patch halts SQPOLL submissions when sqo_task dies by introducing sqo_dead flag. Once set, the SQPOLL task must not do any submission, which is synchronised by uring_lock as well as the new flag. The tricky part is to make sure that disabling always happens, that means either the ring is discovered by creator's do_exit() -> cancel, or if the final close() happens before it's done by the creator. The last is guaranteed by the fact that for SQPOLL the creator task and only it holds exactly one file note, so either it pins up to do_exit() or removed by the creator on the final put in flush. (see comments in uring_flush() around file->f_count == 2). One more place that can trigger io_sq_thread_acquire_*() is __io_req_task_submit(). Shoot off requests on sqo_dead there, even though actually we don't need to. That's because cancellation of sqo_task should wait for the request before going any further. note 1: io_disable_sqo_submit() does io_ring_set_wakeup_flag() so the caller would enter the ring to get an error, but it still doesn't guarantee that the flag won't be cleared. note 2: if final __userspace__ close happens not from the creator task, the file note will pin the ring until the task dies. Fixed: b1b6b5a ("kernel/io_uring: cancel io_uring before task works") Signed-off-by: Pavel Begunkov <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 6b5733e commit d9d0521

File tree

1 file changed

+53
-9
lines changed

1 file changed

+53
-9
lines changed

fs/io_uring.c

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ struct io_ring_ctx {
262262
unsigned int drain_next: 1;
263263
unsigned int eventfd_async: 1;
264264
unsigned int restricted: 1;
265+
unsigned int sqo_dead: 1;
265266

266267
/*
267268
* Ring buffer of indices into array of io_uring_sqe, which is
@@ -2160,12 +2161,11 @@ static void io_req_task_cancel(struct callback_head *cb)
21602161
static void __io_req_task_submit(struct io_kiocb *req)
21612162
{
21622163
struct io_ring_ctx *ctx = req->ctx;
2163-
bool fail;
21642164

2165-
fail = __io_sq_thread_acquire_mm(ctx) ||
2166-
__io_sq_thread_acquire_files(ctx);
21672165
mutex_lock(&ctx->uring_lock);
2168-
if (!fail)
2166+
if (!ctx->sqo_dead &&
2167+
!__io_sq_thread_acquire_mm(ctx) &&
2168+
!__io_sq_thread_acquire_files(ctx))
21692169
__io_queue_sqe(req, NULL);
21702170
else
21712171
__io_req_task_cancel(req, -EFAULT);
@@ -6954,7 +6954,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
69546954
if (!list_empty(&ctx->iopoll_list))
69556955
io_do_iopoll(ctx, &nr_events, 0);
69566956

6957-
if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)))
6957+
if (to_submit && !ctx->sqo_dead &&
6958+
likely(!percpu_ref_is_dying(&ctx->refs)))
69586959
ret = io_submit_sqes(ctx, to_submit);
69596960
mutex_unlock(&ctx->uring_lock);
69606961
}
@@ -8712,6 +8713,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
87128713
{
87138714
mutex_lock(&ctx->uring_lock);
87148715
percpu_ref_kill(&ctx->refs);
8716+
8717+
if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
8718+
ctx->sqo_dead = 1;
8719+
87158720
/* if force is set, the ring is going away. always drop after that */
87168721
ctx->cq_overflow_flushed = 1;
87178722
if (ctx->rings)
@@ -8874,6 +8879,18 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
88748879
}
88758880
}
88768881

8882+
static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
8883+
{
8884+
WARN_ON_ONCE(ctx->sqo_task != current);
8885+
8886+
mutex_lock(&ctx->uring_lock);
8887+
ctx->sqo_dead = 1;
8888+
mutex_unlock(&ctx->uring_lock);
8889+
8890+
/* make sure callers enter the ring to get error */
8891+
io_ring_set_wakeup_flag(ctx);
8892+
}
8893+
88778894
/*
88788895
* We need to iteratively cancel requests, in case a request has dependent
88798896
* hard links. These persist even for failure of cancelations, hence keep
@@ -8885,6 +8902,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
88858902
struct task_struct *task = current;
88868903

88878904
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
8905+
/* for SQPOLL only sqo_task has task notes */
8906+
io_disable_sqo_submit(ctx);
88888907
task = ctx->sq_data->thread;
88898908
atomic_inc(&task->io_uring->in_idle);
88908909
io_sq_thread_park(ctx->sq_data);
@@ -9056,6 +9075,7 @@ void __io_uring_task_cancel(void)
90569075
static int io_uring_flush(struct file *file, void *data)
90579076
{
90589077
struct io_uring_task *tctx = current->io_uring;
9078+
struct io_ring_ctx *ctx = file->private_data;
90599079

90609080
if (!tctx)
90619081
return 0;
@@ -9071,7 +9091,16 @@ static int io_uring_flush(struct file *file, void *data)
90719091
if (atomic_long_read(&file->f_count) != 2)
90729092
return 0;
90739093

9074-
io_uring_del_task_file(file);
9094+
if (ctx->flags & IORING_SETUP_SQPOLL) {
9095+
/* there is only one file note, which is owned by sqo_task */
9096+
WARN_ON_ONCE((ctx->sqo_task == current) ==
9097+
!xa_load(&tctx->xa, (unsigned long)file));
9098+
9099+
io_disable_sqo_submit(ctx);
9100+
}
9101+
9102+
if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
9103+
io_uring_del_task_file(file);
90759104
return 0;
90769105
}
90779106

@@ -9145,8 +9174,9 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
91459174

91469175
#endif /* !CONFIG_MMU */
91479176

9148-
static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
9177+
static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
91499178
{
9179+
int ret = 0;
91509180
DEFINE_WAIT(wait);
91519181

91529182
do {
@@ -9155,13 +9185,20 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
91559185

91569186
prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
91579187

9188+
if (unlikely(ctx->sqo_dead)) {
9189+
ret = -EOWNERDEAD;
9190+
goto out;
9191+
}
9192+
91589193
if (!io_sqring_full(ctx))
91599194
break;
91609195

91619196
schedule();
91629197
} while (!signal_pending(current));
91639198

91649199
finish_wait(&ctx->sqo_sq_wait, &wait);
9200+
out:
9201+
return ret;
91659202
}
91669203

91679204
static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
@@ -9235,10 +9272,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
92359272
if (ctx->flags & IORING_SETUP_SQPOLL) {
92369273
io_cqring_overflow_flush(ctx, false, NULL, NULL);
92379274

9275+
ret = -EOWNERDEAD;
9276+
if (unlikely(ctx->sqo_dead))
9277+
goto out;
92389278
if (flags & IORING_ENTER_SQ_WAKEUP)
92399279
wake_up(&ctx->sq_data->wait);
9240-
if (flags & IORING_ENTER_SQ_WAIT)
9241-
io_sqpoll_wait_sq(ctx);
9280+
if (flags & IORING_ENTER_SQ_WAIT) {
9281+
ret = io_sqpoll_wait_sq(ctx);
9282+
if (ret)
9283+
goto out;
9284+
}
92429285
submitted = to_submit;
92439286
} else if (to_submit) {
92449287
ret = io_uring_add_task_file(ctx, f.file);
@@ -9665,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
96659708
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
96669709
return ret;
96679710
err:
9711+
io_disable_sqo_submit(ctx);
96689712
io_ring_ctx_wait_and_kill(ctx);
96699713
return ret;
96709714
}

0 commit comments

Comments
 (0)