Skip to content

Commit ef62de3

Browse files
isilenceaxboe
authored andcommitted
io_uring/kbuf: use region api for pbuf rings
Convert internal parts of the provided buffer ring managment to the region API. It's the last non-region mapped ring we have, so it also kills a bunch of now unused memmap.c helpers. Signed-off-by: Pavel Begunkov <[email protected]> Link: https://lore.kernel.org/r/6c40cf7beaa648558acd4d84bc0fb3279a35d74b.1732886067.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <[email protected]>
1 parent 90175f3 commit ef62de3

File tree

4 files changed

+73
-240
lines changed

4 files changed

+73
-240
lines changed

io_uring/kbuf.c

Lines changed: 50 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -351,17 +351,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
351351

352352
if (bl->flags & IOBL_BUF_RING) {
353353
i = bl->buf_ring->tail - bl->head;
354-
if (bl->buf_nr_pages) {
355-
int j;
356-
357-
if (!(bl->flags & IOBL_MMAP)) {
358-
for (j = 0; j < bl->buf_nr_pages; j++)
359-
unpin_user_page(bl->buf_pages[j]);
360-
}
361-
io_pages_unmap(bl->buf_ring, &bl->buf_pages,
362-
&bl->buf_nr_pages, bl->flags & IOBL_MMAP);
363-
bl->flags &= ~IOBL_MMAP;
364-
}
354+
io_free_region(ctx, &bl->region);
365355
/* make sure it's seen as empty */
366356
INIT_LIST_HEAD(&bl->buf_list);
367357
bl->flags &= ~IOBL_BUF_RING;
@@ -614,75 +604,14 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
614604
return IOU_OK;
615605
}
616606

617-
static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
618-
struct io_buffer_list *bl)
619-
{
620-
struct io_uring_buf_ring *br = NULL;
621-
struct page **pages;
622-
int nr_pages, ret;
623-
624-
pages = io_pin_pages(reg->ring_addr,
625-
flex_array_size(br, bufs, reg->ring_entries),
626-
&nr_pages);
627-
if (IS_ERR(pages))
628-
return PTR_ERR(pages);
629-
630-
br = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
631-
if (!br) {
632-
ret = -ENOMEM;
633-
goto error_unpin;
634-
}
635-
636-
#ifdef SHM_COLOUR
637-
/*
638-
* On platforms that have specific aliasing requirements, SHM_COLOUR
639-
* is set and we must guarantee that the kernel and user side align
640-
* nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
641-
* the application mmap's the provided ring buffer. Fail the request
642-
* if we, by chance, don't end up with aligned addresses. The app
643-
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle
644-
* this transparently.
645-
*/
646-
if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
647-
ret = -EINVAL;
648-
goto error_unpin;
649-
}
650-
#endif
651-
bl->buf_pages = pages;
652-
bl->buf_nr_pages = nr_pages;
653-
bl->buf_ring = br;
654-
bl->flags |= IOBL_BUF_RING;
655-
bl->flags &= ~IOBL_MMAP;
656-
return 0;
657-
error_unpin:
658-
unpin_user_pages(pages, nr_pages);
659-
kvfree(pages);
660-
vunmap(br);
661-
return ret;
662-
}
663-
664-
static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
665-
struct io_uring_buf_reg *reg,
666-
struct io_buffer_list *bl)
667-
{
668-
size_t ring_size;
669-
670-
ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
671-
672-
bl->buf_ring = io_pages_map(&bl->buf_pages, &bl->buf_nr_pages, ring_size);
673-
if (IS_ERR(bl->buf_ring)) {
674-
bl->buf_ring = NULL;
675-
return -ENOMEM;
676-
}
677-
678-
bl->flags |= (IOBL_BUF_RING | IOBL_MMAP);
679-
return 0;
680-
}
681-
682607
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
683608
{
684609
struct io_uring_buf_reg reg;
685610
struct io_buffer_list *bl, *free_bl = NULL;
611+
struct io_uring_region_desc rd;
612+
struct io_uring_buf_ring *br;
613+
unsigned long mmap_offset;
614+
unsigned long ring_size;
686615
int ret;
687616

688617
lockdep_assert_held(&ctx->uring_lock);
@@ -694,19 +623,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
694623
return -EINVAL;
695624
if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC))
696625
return -EINVAL;
697-
if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
698-
if (!reg.ring_addr)
699-
return -EFAULT;
700-
if (reg.ring_addr & ~PAGE_MASK)
701-
return -EINVAL;
702-
} else {
703-
if (reg.ring_addr)
704-
return -EINVAL;
705-
}
706-
707626
if (!is_power_of_2(reg.ring_entries))
708627
return -EINVAL;
709-
710628
/* cannot disambiguate full vs empty due to head/tail size */
711629
if (reg.ring_entries >= 65536)
712630
return -EINVAL;
@@ -722,21 +640,47 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
722640
return -ENOMEM;
723641
}
724642

725-
if (!(reg.flags & IOU_PBUF_RING_MMAP))
726-
ret = io_pin_pbuf_ring(&reg, bl);
727-
else
728-
ret = io_alloc_pbuf_ring(ctx, &reg, bl);
643+
mmap_offset = reg.bgid << IORING_OFF_PBUF_SHIFT;
644+
ring_size = flex_array_size(br, bufs, reg.ring_entries);
729645

730-
if (!ret) {
731-
bl->nr_entries = reg.ring_entries;
732-
bl->mask = reg.ring_entries - 1;
733-
if (reg.flags & IOU_PBUF_RING_INC)
734-
bl->flags |= IOBL_INC;
646+
memset(&rd, 0, sizeof(rd));
647+
rd.size = PAGE_ALIGN(ring_size);
648+
if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
649+
rd.user_addr = reg.ring_addr;
650+
rd.flags |= IORING_MEM_REGION_TYPE_USER;
651+
}
652+
ret = io_create_region_mmap_safe(ctx, &bl->region, &rd, mmap_offset);
653+
if (ret)
654+
goto fail;
655+
br = io_region_get_ptr(&bl->region);
735656

736-
io_buffer_add_list(ctx, bl, reg.bgid);
737-
return 0;
657+
#ifdef SHM_COLOUR
658+
/*
659+
* On platforms that have specific aliasing requirements, SHM_COLOUR
660+
* is set and we must guarantee that the kernel and user side align
661+
* nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
662+
* the application mmap's the provided ring buffer. Fail the request
663+
* if we, by chance, don't end up with aligned addresses. The app
664+
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle
665+
* this transparently.
666+
*/
667+
if (!(reg.flags & IOU_PBUF_RING_MMAP) &&
668+
((reg.ring_addr | (unsigned long)br) & (SHM_COLOUR - 1))) {
669+
ret = -EINVAL;
670+
goto fail;
738671
}
672+
#endif
739673

674+
bl->nr_entries = reg.ring_entries;
675+
bl->mask = reg.ring_entries - 1;
676+
bl->flags |= IOBL_BUF_RING;
677+
bl->buf_ring = br;
678+
if (reg.flags & IOU_PBUF_RING_INC)
679+
bl->flags |= IOBL_INC;
680+
io_buffer_add_list(ctx, bl, reg.bgid);
681+
return 0;
682+
fail:
683+
io_free_region(ctx, &bl->region);
740684
kfree(free_bl);
741685
return ret;
742686
}
@@ -794,32 +738,18 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
794738
return 0;
795739
}
796740

797-
struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
798-
unsigned long bgid)
799-
{
800-
struct io_buffer_list *bl;
801-
802-
bl = xa_load(&ctx->io_bl_xa, bgid);
803-
/* must be a mmap'able buffer ring and have pages */
804-
if (bl && bl->flags & IOBL_MMAP)
805-
return bl;
806-
807-
return ERR_PTR(-EINVAL);
808-
}
809-
810-
int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma)
741+
struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
742+
unsigned int bgid)
811743
{
812-
struct io_ring_ctx *ctx = file->private_data;
813-
loff_t pgoff = vma->vm_pgoff << PAGE_SHIFT;
814744
struct io_buffer_list *bl;
815-
int bgid;
816745

817746
lockdep_assert_held(&ctx->mmap_lock);
818747

819-
bgid = (pgoff & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
820-
bl = io_pbuf_get_bl(ctx, bgid);
821-
if (IS_ERR(bl))
822-
return PTR_ERR(bl);
748+
bl = xa_load(&ctx->io_bl_xa, bgid);
749+
if (!bl || !(bl->flags & IOBL_BUF_RING))
750+
return NULL;
751+
if (WARN_ON_ONCE(!io_region_is_set(&bl->region)))
752+
return NULL;
823753

824-
return io_uring_mmap_pages(ctx, vma, bl->buf_pages, bl->buf_nr_pages);
754+
return &bl->region;
825755
}

io_uring/kbuf.h

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,13 @@
33
#define IOU_KBUF_H
44

55
#include <uapi/linux/io_uring.h>
6+
#include <linux/io_uring_types.h>
67

78
enum {
89
/* ring mapped provided buffers */
910
IOBL_BUF_RING = 1,
10-
/* ring mapped provided buffers, but mmap'ed by application */
11-
IOBL_MMAP = 2,
1211
/* buffers are consumed incrementally rather than always fully */
13-
IOBL_INC = 4,
14-
12+
IOBL_INC = 2,
1513
};
1614

1715
struct io_buffer_list {
@@ -21,10 +19,7 @@ struct io_buffer_list {
2119
*/
2220
union {
2321
struct list_head buf_list;
24-
struct {
25-
struct page **buf_pages;
26-
struct io_uring_buf_ring *buf_ring;
27-
};
22+
struct io_uring_buf_ring *buf_ring;
2823
};
2924
__u16 bgid;
3025

@@ -35,6 +30,8 @@ struct io_buffer_list {
3530
__u16 mask;
3631

3732
__u16 flags;
33+
34+
struct io_mapped_region region;
3835
};
3936

4037
struct io_buffer {
@@ -81,9 +78,8 @@ void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags);
8178

8279
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
8380

84-
struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
85-
unsigned long bgid);
86-
int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma);
81+
struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
82+
unsigned int bgid);
8783

8884
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
8985
{

0 commit comments

Comments
 (0)