Skip to content

Commit 087f997

Browse files
isilenceaxboe
authored andcommitted
io_uring/memmap: implement mmap for regions
The patch implements mmap for the param region and enables the kernel allocation mode. Internally it uses a fixed mmap offset, however the user has to use the offset returned in struct io_uring_region_desc::mmap_offset. Note, mmap doesn't and can't take ->uring_lock and the region / ring lookup is protected by ->mmap_lock, and it's directly peeking at ctx->param_region. We can't protect io_create_region() with the mmap_lock as it'd deadlock, which is why io_create_region_mmap_safe() initialises it for us in a temporary variable and then publishes it with the lock taken. It's intentionally decoupled from main region helpers, and in the future we might want to have a list of active regions, which then could be protected by the ->mmap_lock. Signed-off-by: Pavel Begunkov <[email protected]> Link: https://lore.kernel.org/r/0f1212bd6af7fb39b63514b34fae8948014221d1.1732886067.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <[email protected]>
1 parent 1e21df6 commit 087f997

File tree

3 files changed

+67
-10
lines changed

3 files changed

+67
-10
lines changed

io_uring/memmap.c

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx,
275275

276276
static int io_region_allocate_pages(struct io_ring_ctx *ctx,
277277
struct io_mapped_region *mr,
278-
struct io_uring_region_desc *reg)
278+
struct io_uring_region_desc *reg,
279+
unsigned long mmap_offset)
279280
{
280281
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
281282
unsigned long size = mr->nr_pages << PAGE_SHIFT;
@@ -290,8 +291,7 @@ static int io_region_allocate_pages(struct io_ring_ctx *ctx,
290291
p = io_mem_alloc_compound(pages, mr->nr_pages, size, gfp);
291292
if (!IS_ERR(p)) {
292293
mr->flags |= IO_REGION_F_SINGLE_REF;
293-
mr->pages = pages;
294-
return 0;
294+
goto done;
295295
}
296296

297297
nr_allocated = alloc_pages_bulk_array_node(gfp, NUMA_NO_NODE,
@@ -302,12 +302,15 @@ static int io_region_allocate_pages(struct io_ring_ctx *ctx,
302302
kvfree(pages);
303303
return -ENOMEM;
304304
}
305+
done:
306+
reg->mmap_offset = mmap_offset;
305307
mr->pages = pages;
306308
return 0;
307309
}
308310

309311
int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
310-
struct io_uring_region_desc *reg)
312+
struct io_uring_region_desc *reg,
313+
unsigned long mmap_offset)
311314
{
312315
int nr_pages, ret;
313316
u64 end;
@@ -341,7 +344,7 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
341344
if (reg->flags & IORING_MEM_REGION_TYPE_USER)
342345
ret = io_region_pin_pages(ctx, mr, reg);
343346
else
344-
ret = io_region_allocate_pages(ctx, mr, reg);
347+
ret = io_region_allocate_pages(ctx, mr, reg, mmap_offset);
345348
if (ret)
346349
goto out_free;
347350

@@ -354,6 +357,40 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
354357
return ret;
355358
}
356359

360+
int io_create_region_mmap_safe(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
361+
struct io_uring_region_desc *reg,
362+
unsigned long mmap_offset)
363+
{
364+
struct io_mapped_region tmp_mr;
365+
int ret;
366+
367+
memcpy(&tmp_mr, mr, sizeof(tmp_mr));
368+
ret = io_create_region(ctx, &tmp_mr, reg, mmap_offset);
369+
if (ret)
370+
return ret;
371+
372+
/*
373+
* Once published mmap can find it without holding only the ->mmap_lock
374+
* and not ->uring_lock.
375+
*/
376+
guard(mutex)(&ctx->mmap_lock);
377+
memcpy(mr, &tmp_mr, sizeof(tmp_mr));
378+
return 0;
379+
}
380+
381+
static void *io_region_validate_mmap(struct io_ring_ctx *ctx,
382+
struct io_mapped_region *mr)
383+
{
384+
lockdep_assert_held(&ctx->mmap_lock);
385+
386+
if (!io_region_is_set(mr))
387+
return ERR_PTR(-EINVAL);
388+
if (mr->flags & IO_REGION_F_USER_PROVIDED)
389+
return ERR_PTR(-EINVAL);
390+
391+
return io_region_get_ptr(mr);
392+
}
393+
357394
static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
358395
size_t sz)
359396
{
@@ -389,6 +426,8 @@ static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
389426
io_put_bl(ctx, bl);
390427
return ptr;
391428
}
429+
case IORING_MAP_OFF_PARAM_REGION:
430+
return io_region_validate_mmap(ctx, &ctx->param_region);
392431
}
393432

394433
return ERR_PTR(-EINVAL);
@@ -405,6 +444,16 @@ int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
405444

406445
#ifdef CONFIG_MMU
407446

447+
static int io_region_mmap(struct io_ring_ctx *ctx,
448+
struct io_mapped_region *mr,
449+
struct vm_area_struct *vma)
450+
{
451+
unsigned long nr_pages = mr->nr_pages;
452+
453+
vm_flags_set(vma, VM_DONTEXPAND);
454+
return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
455+
}
456+
408457
__cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
409458
{
410459
struct io_ring_ctx *ctx = file->private_data;
@@ -429,6 +478,8 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
429478
ctx->n_sqe_pages);
430479
case IORING_OFF_PBUF_RING:
431480
return io_pbuf_mmap(file, vma);
481+
case IORING_MAP_OFF_PARAM_REGION:
482+
return io_region_mmap(ctx, &ctx->param_region, vma);
432483
}
433484

434485
return -EINVAL;

io_uring/memmap.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef IO_URING_MEMMAP_H
22
#define IO_URING_MEMMAP_H
33

4+
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
5+
46
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
57
void io_pages_free(struct page ***pages, int npages);
68
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
@@ -24,7 +26,13 @@ int io_uring_mmap(struct file *file, struct vm_area_struct *vma);
2426

2527
void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr);
2628
int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
27-
struct io_uring_region_desc *reg);
29+
struct io_uring_region_desc *reg,
30+
unsigned long mmap_offset);
31+
32+
int io_create_region_mmap_safe(struct io_ring_ctx *ctx,
33+
struct io_mapped_region *mr,
34+
struct io_uring_region_desc *reg,
35+
unsigned long mmap_offset);
2836

2937
static inline void *io_region_get_ptr(struct io_mapped_region *mr)
3038
{

io_uring/register.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -588,9 +588,6 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
588588
rd_uptr = u64_to_user_ptr(reg.region_uptr);
589589
if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
590590
return -EFAULT;
591-
592-
if (!(rd.flags & IORING_MEM_REGION_TYPE_USER))
593-
return -EINVAL;
594591
if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
595592
return -EINVAL;
596593
if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG)
@@ -605,7 +602,8 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
605602
!(ctx->flags & IORING_SETUP_R_DISABLED))
606603
return -EINVAL;
607604

608-
ret = io_create_region(ctx, &ctx->param_region, &rd);
605+
ret = io_create_region_mmap_safe(ctx, &ctx->param_region, &rd,
606+
IORING_MAP_OFF_PARAM_REGION);
609607
if (ret)
610608
return ret;
611609
if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {

0 commit comments

Comments
 (0)