Skip to content

Commit 5cf4f52

Browse files
committed
io_uring: free io_buffer_list entries via RCU
mmap_lock nests under uring_lock out of necessity, as we may be doing user copies with uring_lock held. However, for mmap of provided buffer rings, we attempt to grab uring_lock with mmap_lock already held from do_mmap(). This makes lockdep, rightfully, complain: WARNING: possible circular locking dependency detected 6.7.0-rc1-00009-gff3337ebaf94-dirty kernel-patches#4438 Not tainted ------------------------------------------------------ buf-ring.t/442 is trying to acquire lock: ffff00020e1480a8 (&ctx->uring_lock){+.+.}-{3:3}, at: io_uring_validate_mmap_request.isra.0+0x4c/0x140 but task is already holding lock: ffff0000dc226190 (&mm->mmap_lock){++++}-{3:3}, at: vm_mmap_pgoff+0x124/0x264 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> kernel-patches#1 (&mm->mmap_lock){++++}-{3:3}: __might_fault+0x90/0xbc io_register_pbuf_ring+0x94/0x488 __arm64_sys_io_uring_register+0x8dc/0x1318 invoke_syscall+0x5c/0x17c el0_svc_common.constprop.0+0x108/0x130 do_el0_svc+0x2c/0x38 el0_svc+0x4c/0x94 el0t_64_sync_handler+0x118/0x124 el0t_64_sync+0x168/0x16c -> #0 (&ctx->uring_lock){+.+.}-{3:3}: __lock_acquire+0x19a0/0x2d14 lock_acquire+0x2e0/0x44c __mutex_lock+0x118/0x564 mutex_lock_nested+0x20/0x28 io_uring_validate_mmap_request.isra.0+0x4c/0x140 io_uring_mmu_get_unmapped_area+0x3c/0x98 get_unmapped_area+0xa4/0x158 do_mmap+0xec/0x5b4 vm_mmap_pgoff+0x158/0x264 ksys_mmap_pgoff+0x1d4/0x254 __arm64_sys_mmap+0x80/0x9c invoke_syscall+0x5c/0x17c el0_svc_common.constprop.0+0x108/0x130 do_el0_svc+0x2c/0x38 el0_svc+0x4c/0x94 el0t_64_sync_handler+0x118/0x124 el0t_64_sync+0x168/0x16c From that mmap(2) path, we really just need to ensure that the buffer list doesn't go away from underneath us. For the lower indexed entries, they never go away until the ring is freed and we can always sanely reference those as long as the caller has a file reference. For the higher indexed ones in our xarray, we just need to ensure that the buffer list remains valid while we return the address of it. Free the higher indexed io_buffer_list entries via RCU. With that we can avoid needing ->uring_lock inside mmap(2), and simply hold the RCU read lock around the buffer list lookup and address check. To ensure that the arrayed lookup either returns a valid fully formulated entry via RCU lookup, add an 'is_ready' flag that we access with store and release memory ordering. This isn't needed for the xarray lookups, but doesn't hurt either. Since this isn't a fast path, retain it across both types. Similarly, for the allocated array inside the ctx, ensure we use the proper load/acquire as setup could in theory be running in parallel with mmap. While in there, add a few lockdep checks for documentation purposes. Cc: [email protected] Fixes: c56e022 ("io_uring: add support for user mapped provided buffer ring") Signed-off-by: Jens Axboe <[email protected]>
1 parent 07d6063 commit 5cf4f52

File tree

3 files changed

+56
-15
lines changed

3 files changed

+56
-15
lines changed

io_uring/io_uring.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3498,9 +3498,9 @@ static void *io_uring_validate_mmap_request(struct file *file,
34983498
unsigned int bgid;
34993499

35003500
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
3501-
mutex_lock(&ctx->uring_lock);
3501+
rcu_read_lock();
35023502
ptr = io_pbuf_get_address(ctx, bgid);
3503-
mutex_unlock(&ctx->uring_lock);
3503+
rcu_read_unlock();
35043504
if (!ptr)
35053505
return ERR_PTR(-EINVAL);
35063506
break;

io_uring/kbuf.c

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,35 @@ struct io_buf_free {
4040
int inuse;
4141
};
4242

43+
static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
44+
struct io_buffer_list *bl,
45+
unsigned int bgid)
46+
{
47+
if (bl && bgid < BGID_ARRAY)
48+
return &bl[bgid];
49+
50+
return xa_load(&ctx->io_bl_xa, bgid);
51+
}
52+
4353
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
4454
unsigned int bgid)
4555
{
46-
if (ctx->io_bl && bgid < BGID_ARRAY)
47-
return &ctx->io_bl[bgid];
56+
lockdep_assert_held(&ctx->uring_lock);
4857

49-
return xa_load(&ctx->io_bl_xa, bgid);
58+
return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
5059
}
5160

5261
static int io_buffer_add_list(struct io_ring_ctx *ctx,
5362
struct io_buffer_list *bl, unsigned int bgid)
5463
{
64+
/*
65+
* Store buffer group ID and finally mark the list as visible.
66+
* The normal lookup doesn't care about the visibility as we're
67+
* always under the ->uring_lock, but the RCU lookup from mmap does.
68+
*/
5569
bl->bgid = bgid;
70+
smp_store_release(&bl->is_ready, 1);
71+
5672
if (bgid < BGID_ARRAY)
5773
return 0;
5874

@@ -203,18 +219,19 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
203219

204220
static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
205221
{
222+
struct io_buffer_list *bl;
206223
int i;
207224

208-
ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list),
209-
GFP_KERNEL);
210-
if (!ctx->io_bl)
225+
bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
226+
if (!bl)
211227
return -ENOMEM;
212228

213229
for (i = 0; i < BGID_ARRAY; i++) {
214-
INIT_LIST_HEAD(&ctx->io_bl[i].buf_list);
215-
ctx->io_bl[i].bgid = i;
230+
INIT_LIST_HEAD(&bl[i].buf_list);
231+
bl[i].bgid = i;
216232
}
217233

234+
smp_store_release(&ctx->io_bl, bl);
218235
return 0;
219236
}
220237

@@ -303,7 +320,7 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
303320
xa_for_each(&ctx->io_bl_xa, index, bl) {
304321
xa_erase(&ctx->io_bl_xa, bl->bgid);
305322
__io_remove_buffers(ctx, bl, -1U);
306-
kfree(bl);
323+
kfree_rcu(bl, rcu);
307324
}
308325

309326
/*
@@ -497,7 +514,16 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
497514
INIT_LIST_HEAD(&bl->buf_list);
498515
ret = io_buffer_add_list(ctx, bl, p->bgid);
499516
if (ret) {
500-
kfree(bl);
517+
/*
518+
* Doesn't need rcu free as it was never visible, but
519+
* let's keep it consistent throughout. Also can't
520+
* be a lower indexed array group, as adding one
521+
* where lookup failed cannot happen.
522+
*/
523+
if (p->bgid >= BGID_ARRAY)
524+
kfree_rcu(bl, rcu);
525+
else
526+
WARN_ON_ONCE(1);
501527
goto err;
502528
}
503529
}
@@ -636,6 +662,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
636662
struct io_buffer_list *bl, *free_bl = NULL;
637663
int ret;
638664

665+
lockdep_assert_held(&ctx->uring_lock);
666+
639667
if (copy_from_user(&reg, arg, sizeof(reg)))
640668
return -EFAULT;
641669

@@ -690,7 +718,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
690718
return 0;
691719
}
692720

693-
kfree(free_bl);
721+
kfree_rcu(free_bl, rcu);
694722
return ret;
695723
}
696724

@@ -699,6 +727,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
699727
struct io_uring_buf_reg reg;
700728
struct io_buffer_list *bl;
701729

730+
lockdep_assert_held(&ctx->uring_lock);
731+
702732
if (copy_from_user(&reg, arg, sizeof(reg)))
703733
return -EFAULT;
704734
if (reg.resv[0] || reg.resv[1] || reg.resv[2])
@@ -715,7 +745,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
715745
__io_remove_buffers(ctx, bl, -1U);
716746
if (bl->bgid >= BGID_ARRAY) {
717747
xa_erase(&ctx->io_bl_xa, bl->bgid);
718-
kfree(bl);
748+
kfree_rcu(bl, rcu);
719749
}
720750
return 0;
721751
}
@@ -724,7 +754,15 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
724754
{
725755
struct io_buffer_list *bl;
726756

727-
bl = io_buffer_get_list(ctx, bgid);
757+
bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
758+
759+
/*
760+
* Ensure the list is fully setup. Only strictly needed for RCU lookup
761+
* via mmap, and in that case only for the array indexed groups. For
762+
* the xarray lookups, it's either visible and ready, or not at all.
763+
*/
764+
if (!smp_load_acquire(&bl->is_ready))
765+
return NULL;
728766
if (!bl || !bl->is_mmap)
729767
return NULL;
730768

io_uring/kbuf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ struct io_buffer_list {
1515
struct page **buf_pages;
1616
struct io_uring_buf_ring *buf_ring;
1717
};
18+
struct rcu_head rcu;
1819
};
1920
__u16 bgid;
2021

@@ -28,6 +29,8 @@ struct io_buffer_list {
2829
__u8 is_mapped;
2930
/* ring mapped provided buffers, but mmap'ed by application */
3031
__u8 is_mmap;
32+
/* bl is visible from an RCU point of view for lookup */
33+
__u8 is_ready;
3134
};
3235

3336
struct io_buffer {

0 commit comments

Comments
 (0)