Skip to content

Commit 78fda3d

Browse files
isilenceaxboe
authored andcommitted
io_uring/kbuf: use mmap_lock to sync with mmap
A preparation / cleanup patch simplifying the buf ring - mmap synchronisation. Instead of relying on RCU, which is trickier, do it by grabbing the mmap_lock when when anyone tries to publish or remove a registered buffer to / from ->io_bl_xa. Modifications of the xarray should always be protected by both ->uring_lock and ->mmap_lock, while lookups should hold either of them. While a struct io_buffer_list is in the xarray, the mmap related fields like ->flags and ->buf_pages should stay stable. Signed-off-by: Pavel Begunkov <[email protected]> Link: https://lore.kernel.org/r/af13bde56ee1a26bcaefaa9aad37a9ea318a590e.1732886067.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <[email protected]>
1 parent 81a4058 commit 78fda3d

File tree

3 files changed

+29
-33
lines changed

3 files changed

+29
-33
lines changed

include/linux/io_uring_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,11 @@ struct io_ring_ctx {
294294

295295
struct io_submit_state submit_state;
296296

297+
/*
298+
* Modifications are protected by ->uring_lock and ->mmap_lock.
299+
* The flags, buf_pages and buf_nr_pages fields should be stable
300+
* once published.
301+
*/
297302
struct xarray io_bl_xa;
298303

299304
struct io_hash_table cancel_table;

io_uring/kbuf.c

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,11 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
4545
/*
4646
* Store buffer group ID and finally mark the list as visible.
4747
* The normal lookup doesn't care about the visibility as we're
48-
* always under the ->uring_lock, but the RCU lookup from mmap does.
48+
* always under the ->uring_lock, but lookups from mmap do.
4949
*/
5050
bl->bgid = bgid;
5151
atomic_set(&bl->refs, 1);
52+
guard(mutex)(&ctx->mmap_lock);
5253
return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
5354
}
5455

@@ -388,7 +389,7 @@ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
388389
{
389390
if (atomic_dec_and_test(&bl->refs)) {
390391
__io_remove_buffers(ctx, bl, -1U);
391-
kfree_rcu(bl, rcu);
392+
kfree(bl);
392393
}
393394
}
394395

@@ -397,10 +398,17 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
397398
struct io_buffer_list *bl;
398399
struct list_head *item, *tmp;
399400
struct io_buffer *buf;
400-
unsigned long index;
401401

402-
xa_for_each(&ctx->io_bl_xa, index, bl) {
403-
xa_erase(&ctx->io_bl_xa, bl->bgid);
402+
while (1) {
403+
unsigned long index = 0;
404+
405+
scoped_guard(mutex, &ctx->mmap_lock) {
406+
bl = xa_find(&ctx->io_bl_xa, &index, ULONG_MAX, XA_PRESENT);
407+
if (bl)
408+
xa_erase(&ctx->io_bl_xa, bl->bgid);
409+
}
410+
if (!bl)
411+
break;
404412
io_put_bl(ctx, bl);
405413
}
406414

@@ -589,11 +597,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
589597
INIT_LIST_HEAD(&bl->buf_list);
590598
ret = io_buffer_add_list(ctx, bl, p->bgid);
591599
if (ret) {
592-
/*
593-
* Doesn't need rcu free as it was never visible, but
594-
* let's keep it consistent throughout.
595-
*/
596-
kfree_rcu(bl, rcu);
600+
kfree(bl);
597601
goto err;
598602
}
599603
}
@@ -736,7 +740,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
736740
return 0;
737741
}
738742

739-
kfree_rcu(free_bl, rcu);
743+
kfree(free_bl);
740744
return ret;
741745
}
742746

@@ -760,7 +764,9 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
760764
if (!(bl->flags & IOBL_BUF_RING))
761765
return -EINVAL;
762766

763-
xa_erase(&ctx->io_bl_xa, bl->bgid);
767+
scoped_guard(mutex, &ctx->mmap_lock)
768+
xa_erase(&ctx->io_bl_xa, bl->bgid);
769+
764770
io_put_bl(ctx, bl);
765771
return 0;
766772
}
@@ -795,29 +801,13 @@ struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
795801
unsigned long bgid)
796802
{
797803
struct io_buffer_list *bl;
798-
bool ret;
799804

800-
/*
801-
* We have to be a bit careful here - we're inside mmap and cannot grab
802-
* the uring_lock. This means the buffer_list could be simultaneously
803-
* going away, if someone is trying to be sneaky. Look it up under rcu
804-
* so we know it's not going away, and attempt to grab a reference to
805-
* it. If the ref is already zero, then fail the mapping. If successful,
806-
* the caller will call io_put_bl() to drop the the reference at at the
807-
* end. This may then safely free the buffer_list (and drop the pages)
808-
* at that point, vm_insert_pages() would've already grabbed the
809-
* necessary vma references.
810-
*/
811-
rcu_read_lock();
812805
bl = xa_load(&ctx->io_bl_xa, bgid);
813806
/* must be a mmap'able buffer ring and have pages */
814-
ret = false;
815-
if (bl && bl->flags & IOBL_MMAP)
816-
ret = atomic_inc_not_zero(&bl->refs);
817-
rcu_read_unlock();
818-
819-
if (ret)
820-
return bl;
807+
if (bl && bl->flags & IOBL_MMAP) {
808+
if (atomic_inc_not_zero(&bl->refs))
809+
return bl;
810+
}
821811

822812
return ERR_PTR(-EINVAL);
823813
}
@@ -829,6 +819,8 @@ int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma)
829819
struct io_buffer_list *bl;
830820
int bgid, ret;
831821

822+
lockdep_assert_held(&ctx->mmap_lock);
823+
832824
bgid = (pgoff & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
833825
bl = io_pbuf_get_bl(ctx, bgid);
834826
if (IS_ERR(bl))

io_uring/kbuf.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ struct io_buffer_list {
2525
struct page **buf_pages;
2626
struct io_uring_buf_ring *buf_ring;
2727
};
28-
struct rcu_head rcu;
2928
};
3029
__u16 bgid;
3130

0 commit comments

Comments
 (0)