Skip to content

Commit 81a4058

Browse files
isilenceaxboe
authored andcommitted
io_uring: use region api for CQ
Convert internal parts of the CQ/SQ array managment to the region API. Signed-off-by: Pavel Begunkov <[email protected]> Link: https://lore.kernel.org/r/46fc3c801290d6b1ac16023d78f6b8e685c87fd6.1732886067.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <[email protected]>
1 parent 8078486 commit 81a4058

File tree

5 files changed

+36
-102
lines changed

5 files changed

+36
-102
lines changed

include/linux/io_uring_types.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -427,14 +427,8 @@ struct io_ring_ctx {
427427
*/
428428
struct mutex mmap_lock;
429429

430-
/*
431-
* If IORING_SETUP_NO_MMAP is used, then the below holds
432-
* the gup'ed pages for the two rings, and the sqes.
433-
*/
434-
unsigned short n_ring_pages;
435-
struct page **ring_pages;
436-
437430
struct io_mapped_region sq_region;
431+
struct io_mapped_region ring_region;
438432
/* used for optimised request parameter and wait argument passing */
439433
struct io_mapped_region param_region;
440434
};

io_uring/io_uring.c

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2634,26 +2634,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
26342634
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
26352635
}
26362636

2637-
static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
2638-
size_t size)
2639-
{
2640-
return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
2641-
size);
2642-
}
2643-
26442637
static void io_rings_free(struct io_ring_ctx *ctx)
26452638
{
2646-
if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
2647-
io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages,
2648-
true);
2649-
} else {
2650-
io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
2651-
ctx->n_ring_pages = 0;
2652-
vunmap(ctx->rings);
2653-
}
2654-
26552639
io_free_region(ctx, &ctx->sq_region);
2656-
2640+
io_free_region(ctx, &ctx->ring_region);
26572641
ctx->rings = NULL;
26582642
ctx->sq_sqes = NULL;
26592643
}
@@ -3485,15 +3469,17 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
34853469
if (size == SIZE_MAX)
34863470
return -EOVERFLOW;
34873471

3488-
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
3489-
rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size);
3490-
else
3491-
rings = io_rings_map(ctx, p->cq_off.user_addr, size);
3492-
3493-
if (IS_ERR(rings))
3494-
return PTR_ERR(rings);
3472+
memset(&rd, 0, sizeof(rd));
3473+
rd.size = PAGE_ALIGN(size);
3474+
if (ctx->flags & IORING_SETUP_NO_MMAP) {
3475+
rd.user_addr = p->cq_off.user_addr;
3476+
rd.flags |= IORING_MEM_REGION_TYPE_USER;
3477+
}
3478+
ret = io_create_region(ctx, &ctx->ring_region, &rd, IORING_OFF_CQ_RING);
3479+
if (ret)
3480+
return ret;
3481+
ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
34953482

3496-
ctx->rings = rings;
34973483
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
34983484
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
34993485
rings->sq_ring_mask = p->sq_entries - 1;

io_uring/memmap.c

Lines changed: 8 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -120,18 +120,6 @@ void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
120120
*npages = 0;
121121
}
122122

123-
void io_pages_free(struct page ***pages, int npages)
124-
{
125-
struct page **page_array = *pages;
126-
127-
if (!page_array)
128-
return;
129-
130-
unpin_user_pages(page_array, npages);
131-
kvfree(page_array);
132-
*pages = NULL;
133-
}
134-
135123
struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
136124
{
137125
unsigned long start, end, nr_pages;
@@ -174,34 +162,6 @@ struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
174162
return ERR_PTR(ret);
175163
}
176164

177-
void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
178-
unsigned long uaddr, size_t size)
179-
{
180-
struct page **page_array;
181-
unsigned int nr_pages;
182-
void *page_addr;
183-
184-
*npages = 0;
185-
186-
if (uaddr & (PAGE_SIZE - 1) || !size)
187-
return ERR_PTR(-EINVAL);
188-
189-
nr_pages = 0;
190-
page_array = io_pin_pages(uaddr, size, &nr_pages);
191-
if (IS_ERR(page_array))
192-
return page_array;
193-
194-
page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
195-
if (page_addr) {
196-
*pages = page_array;
197-
*npages = nr_pages;
198-
return page_addr;
199-
}
200-
201-
io_pages_free(&page_array, nr_pages);
202-
return ERR_PTR(-ENOMEM);
203-
}
204-
205165
enum {
206166
/* memory was vmap'ed for the kernel, freeing the region vunmap's it */
207167
IO_REGION_F_VMAP = 1,
@@ -446,9 +406,10 @@ int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
446406

447407
static int io_region_mmap(struct io_ring_ctx *ctx,
448408
struct io_mapped_region *mr,
449-
struct vm_area_struct *vma)
409+
struct vm_area_struct *vma,
410+
unsigned max_pages)
450411
{
451-
unsigned long nr_pages = mr->nr_pages;
412+
unsigned long nr_pages = min(mr->nr_pages, max_pages);
452413

453414
vm_flags_set(vma, VM_DONTEXPAND);
454415
return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
@@ -459,7 +420,7 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
459420
struct io_ring_ctx *ctx = file->private_data;
460421
size_t sz = vma->vm_end - vma->vm_start;
461422
long offset = vma->vm_pgoff << PAGE_SHIFT;
462-
unsigned int npages;
423+
unsigned int page_limit;
463424
void *ptr;
464425

465426
guard(mutex)(&ctx->mmap_lock);
@@ -471,14 +432,14 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
471432
switch (offset & IORING_OFF_MMAP_MASK) {
472433
case IORING_OFF_SQ_RING:
473434
case IORING_OFF_CQ_RING:
474-
npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
475-
return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
435+
page_limit = (sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
436+
return io_region_mmap(ctx, &ctx->ring_region, vma, page_limit);
476437
case IORING_OFF_SQES:
477-
return io_region_mmap(ctx, &ctx->sq_region, vma);
438+
return io_region_mmap(ctx, &ctx->sq_region, vma, UINT_MAX);
478439
case IORING_OFF_PBUF_RING:
479440
return io_pbuf_mmap(file, vma);
480441
case IORING_MAP_OFF_PARAM_REGION:
481-
return io_region_mmap(ctx, &ctx->param_region, vma);
442+
return io_region_mmap(ctx, &ctx->param_region, vma, UINT_MAX);
482443
}
483444

484445
return -EINVAL;

io_uring/memmap.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
55

66
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
7-
void io_pages_free(struct page ***pages, int npages);
87
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
98
struct page **pages, int npages);
109

@@ -13,9 +12,6 @@ void *io_pages_map(struct page ***out_pages, unsigned short *npages,
1312
void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
1413
bool put_pages);
1514

16-
void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
17-
unsigned long uaddr, size_t size);
18-
1915
#ifndef CONFIG_MMU
2016
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
2117
#endif

io_uring/register.c

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -367,26 +367,19 @@ static int io_register_clock(struct io_ring_ctx *ctx,
367367
* either mapping or freeing.
368368
*/
369369
struct io_ring_ctx_rings {
370-
unsigned short n_ring_pages;
371-
struct page **ring_pages;
372370
struct io_rings *rings;
373-
374371
struct io_uring_sqe *sq_sqes;
372+
375373
struct io_mapped_region sq_region;
374+
struct io_mapped_region ring_region;
376375
};
377376

378377
static void io_register_free_rings(struct io_ring_ctx *ctx,
379378
struct io_uring_params *p,
380379
struct io_ring_ctx_rings *r)
381380
{
382-
if (!(p->flags & IORING_SETUP_NO_MMAP)) {
383-
io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages,
384-
true);
385-
} else {
386-
io_pages_free(&r->ring_pages, r->n_ring_pages);
387-
vunmap(r->rings);
388-
}
389381
io_free_region(ctx, &r->sq_region);
382+
io_free_region(ctx, &r->ring_region);
390383
}
391384

392385
#define swap_old(ctx, o, n, field) \
@@ -439,13 +432,18 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
439432
if (size == SIZE_MAX)
440433
return -EOVERFLOW;
441434

442-
if (!(p.flags & IORING_SETUP_NO_MMAP))
443-
n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size);
444-
else
445-
n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages,
446-
p.cq_off.user_addr, size);
447-
if (IS_ERR(n.rings))
448-
return PTR_ERR(n.rings);
435+
memset(&rd, 0, sizeof(rd));
436+
rd.size = PAGE_ALIGN(size);
437+
if (p.flags & IORING_SETUP_NO_MMAP) {
438+
rd.user_addr = p.cq_off.user_addr;
439+
rd.flags |= IORING_MEM_REGION_TYPE_USER;
440+
}
441+
ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
442+
if (ret) {
443+
io_register_free_rings(ctx, &p, &n);
444+
return ret;
445+
}
446+
n.rings = io_region_get_ptr(&n.ring_region);
449447

450448
n.rings->sq_ring_mask = p.sq_entries - 1;
451449
n.rings->cq_ring_mask = p.cq_entries - 1;
@@ -555,8 +553,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
555553

556554
ctx->rings = n.rings;
557555
ctx->sq_sqes = n.sq_sqes;
558-
swap_old(ctx, o, n, n_ring_pages);
559-
swap_old(ctx, o, n, ring_pages);
556+
swap_old(ctx, o, n, ring_region);
560557
swap_old(ctx, o, n, sq_region);
561558
to_free = &o;
562559
ret = 0;

0 commit comments

Comments
 (0)