Skip to content

Commit 6d13760

Browse files
committed
Merge tag 'io_uring-6.16-20250614' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: - Fix for a race between SQPOLL exit and fdinfo reading. It's slim and I was only able to reproduce this with an artificial delay in the kernel. Followup sparse fix as well to unify the access to ->thread. - Fix for multiple buffer peeking, avoiding truncation if possible. - Run local task_work for IOPOLL reaping when the ring is exiting. This currently isn't done due to an assumption that polled IO will never need task_work, but a fix on the block side is going to change that. * tag 'io_uring-6.16-20250614' of git://git.kernel.dk/linux: io_uring: run local task_work from ring exit IOPOLL reaping io_uring/kbuf: don't truncate end buffer for multiple buffer peeks io_uring: consistently use rcu semantics with sqpoll thread io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()
2 parents 588adb2 + b62e0ef commit 6d13760

File tree

6 files changed

+59
-23
lines changed

6 files changed

+59
-23
lines changed

io_uring/fdinfo.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,18 +141,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
141141

142142
if (ctx->flags & IORING_SETUP_SQPOLL) {
143143
struct io_sq_data *sq = ctx->sq_data;
144+
struct task_struct *tsk;
144145

146+
rcu_read_lock();
147+
tsk = rcu_dereference(sq->thread);
145148
/*
146149
* sq->thread might be NULL if we raced with the sqpoll
147150
* thread termination.
148151
*/
149-
if (sq->thread) {
152+
if (tsk) {
153+
get_task_struct(tsk);
154+
rcu_read_unlock();
155+
getrusage(tsk, RUSAGE_SELF, &sq_usage);
156+
put_task_struct(tsk);
150157
sq_pid = sq->task_pid;
151158
sq_cpu = sq->sq_cpu;
152-
getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
153159
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
154160
+ sq_usage.ru_stime.tv_usec);
155161
sq_work_time = sq->work_time;
162+
} else {
163+
rcu_read_unlock();
156164
}
157165
}
158166

io_uring/io_uring.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,6 +1523,9 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
15231523
}
15241524
}
15251525
mutex_unlock(&ctx->uring_lock);
1526+
1527+
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
1528+
io_move_task_work_from_local(ctx);
15261529
}
15271530

15281531
static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events)
@@ -2906,7 +2909,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
29062909
struct task_struct *tsk;
29072910

29082911
io_sq_thread_park(sqd);
2909-
tsk = sqd->thread;
2912+
tsk = sqpoll_task_locked(sqd);
29102913
if (tsk && tsk->io_uring && tsk->io_uring->io_wq)
29112914
io_wq_cancel_cb(tsk->io_uring->io_wq,
29122915
io_cancel_ctx_cb, ctx, true);
@@ -3142,7 +3145,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
31423145
s64 inflight;
31433146
DEFINE_WAIT(wait);
31443147

3145-
WARN_ON_ONCE(sqd && sqd->thread != current);
3148+
WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
31463149

31473150
if (!current->io_uring)
31483151
return;

io_uring/kbuf.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
270270
/* truncate end piece, if needed, for non partial buffers */
271271
if (len > arg->max_len) {
272272
len = arg->max_len;
273-
if (!(bl->flags & IOBL_INC))
273+
if (!(bl->flags & IOBL_INC)) {
274+
if (iov != arg->iovs)
275+
break;
274276
buf->len = len;
277+
}
275278
}
276279

277280
iov->iov_base = u64_to_user_ptr(buf->addr);

io_uring/register.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
273273
if (ctx->flags & IORING_SETUP_SQPOLL) {
274274
sqd = ctx->sq_data;
275275
if (sqd) {
276+
struct task_struct *tsk;
277+
276278
/*
277279
* Observe the correct sqd->lock -> ctx->uring_lock
278280
* ordering. Fine to drop uring_lock here, we hold
@@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
282284
mutex_unlock(&ctx->uring_lock);
283285
mutex_lock(&sqd->lock);
284286
mutex_lock(&ctx->uring_lock);
285-
if (sqd->thread)
286-
tctx = sqd->thread->io_uring;
287+
tsk = sqpoll_task_locked(sqd);
288+
if (tsk)
289+
tctx = tsk->io_uring;
287290
}
288291
} else {
289292
tctx = current->io_uring;

io_uring/sqpoll.c

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ enum {
3030
void io_sq_thread_unpark(struct io_sq_data *sqd)
3131
__releases(&sqd->lock)
3232
{
33-
WARN_ON_ONCE(sqd->thread == current);
33+
WARN_ON_ONCE(sqpoll_task_locked(sqd) == current);
3434

3535
/*
3636
* Do the dance but not conditional clear_bit() because it'd race with
@@ -46,24 +46,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd)
4646
void io_sq_thread_park(struct io_sq_data *sqd)
4747
__acquires(&sqd->lock)
4848
{
49-
WARN_ON_ONCE(data_race(sqd->thread) == current);
49+
struct task_struct *tsk;
5050

5151
atomic_inc(&sqd->park_pending);
5252
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
5353
mutex_lock(&sqd->lock);
54-
if (sqd->thread)
55-
wake_up_process(sqd->thread);
54+
55+
tsk = sqpoll_task_locked(sqd);
56+
if (tsk) {
57+
WARN_ON_ONCE(tsk == current);
58+
wake_up_process(tsk);
59+
}
5660
}
5761

5862
void io_sq_thread_stop(struct io_sq_data *sqd)
5963
{
60-
WARN_ON_ONCE(sqd->thread == current);
64+
struct task_struct *tsk;
65+
6166
WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
6267

6368
set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
6469
mutex_lock(&sqd->lock);
65-
if (sqd->thread)
66-
wake_up_process(sqd->thread);
70+
tsk = sqpoll_task_locked(sqd);
71+
if (tsk) {
72+
WARN_ON_ONCE(tsk == current);
73+
wake_up_process(tsk);
74+
}
6775
mutex_unlock(&sqd->lock);
6876
wait_for_completion(&sqd->exited);
6977
}
@@ -270,7 +278,8 @@ static int io_sq_thread(void *data)
270278
/* offload context creation failed, just exit */
271279
if (!current->io_uring) {
272280
mutex_lock(&sqd->lock);
273-
sqd->thread = NULL;
281+
rcu_assign_pointer(sqd->thread, NULL);
282+
put_task_struct(current);
274283
mutex_unlock(&sqd->lock);
275284
goto err_out;
276285
}
@@ -379,7 +388,8 @@ static int io_sq_thread(void *data)
379388
io_sq_tw(&retry_list, UINT_MAX);
380389

381390
io_uring_cancel_generic(true, sqd);
382-
sqd->thread = NULL;
391+
rcu_assign_pointer(sqd->thread, NULL);
392+
put_task_struct(current);
383393
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
384394
atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
385395
io_run_task_work();
@@ -484,7 +494,10 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
484494
goto err_sqpoll;
485495
}
486496

487-
sqd->thread = tsk;
497+
mutex_lock(&sqd->lock);
498+
rcu_assign_pointer(sqd->thread, tsk);
499+
mutex_unlock(&sqd->lock);
500+
488501
task_to_put = get_task_struct(tsk);
489502
ret = io_uring_alloc_task_context(tsk, ctx);
490503
wake_up_new_task(tsk);
@@ -495,9 +508,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
495508
ret = -EINVAL;
496509
goto err;
497510
}
498-
499-
if (task_to_put)
500-
put_task_struct(task_to_put);
501511
return 0;
502512
err_sqpoll:
503513
complete(&ctx->sq_data->exited);
@@ -515,10 +525,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx,
515525
int ret = -EINVAL;
516526

517527
if (sqd) {
528+
struct task_struct *tsk;
529+
518530
io_sq_thread_park(sqd);
519531
/* Don't set affinity for a dying thread */
520-
if (sqd->thread)
521-
ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
532+
tsk = sqpoll_task_locked(sqd);
533+
if (tsk)
534+
ret = io_wq_cpu_affinity(tsk->io_uring, mask);
522535
io_sq_thread_unpark(sqd);
523536
}
524537

io_uring/sqpoll.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ struct io_sq_data {
88
/* ctx's that are using this sqd */
99
struct list_head ctx_list;
1010

11-
struct task_struct *thread;
11+
struct task_struct __rcu *thread;
1212
struct wait_queue_head wait;
1313

1414
unsigned sq_thread_idle;
@@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
2929
void io_put_sq_data(struct io_sq_data *sqd);
3030
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
3131
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
32+
33+
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
34+
{
35+
return rcu_dereference_protected(sqd->thread,
36+
lockdep_is_held(&sqd->lock));
37+
}

0 commit comments

Comments
 (0)