Skip to content

Commit c392cbe

Browse files
committed
io_uring/kbuf: defer release of mapped buffer rings
If a provided buffer ring is setup with IOU_PBUF_RING_MMAP, then the kernel allocates the memory for it and the application is expected to mmap(2) this memory. However, io_uring uses remap_pfn_range() for this operation, so we cannot rely on normal munmap/release on freeing them for us. Stash an io_buf_free entry away for each of these, if any, and provide a helper to free them post ->release(). Cc: [email protected] Fixes: c56e022 ("io_uring: add support for user mapped provided buffer ring") Reported-by: Jann Horn <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent edecf16 commit c392cbe

File tree

4 files changed

+46
-5
lines changed

4 files changed

+46
-5
lines changed

include/linux/io_uring_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ struct io_ring_ctx {
340340

341341
struct list_head io_buffers_cache;
342342

343+
/* deferred free list, protected by ->uring_lock */
344+
struct hlist_head io_buf_list;
345+
343346
/* Keep this last, we don't need it for the fast path */
344347
struct wait_queue_head poll_wq;
345348
struct io_restriction restrictions;

io_uring/io_uring.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
325325
INIT_LIST_HEAD(&ctx->sqd_list);
326326
INIT_LIST_HEAD(&ctx->cq_overflow_list);
327327
INIT_LIST_HEAD(&ctx->io_buffers_cache);
328+
INIT_HLIST_HEAD(&ctx->io_buf_list);
328329
io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
329330
sizeof(struct io_rsrc_node));
330331
io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
@@ -2950,6 +2951,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
29502951
ctx->mm_account = NULL;
29512952
}
29522953
io_rings_free(ctx);
2954+
io_kbuf_mmap_list_free(ctx);
29532955

29542956
percpu_ref_exit(&ctx->refs);
29552957
free_uid(ctx->user);

io_uring/kbuf.c

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ struct io_provide_buf {
3333
__u16 bid;
3434
};
3535

36+
struct io_buf_free {
37+
struct hlist_node list;
38+
void *mem;
39+
};
40+
3641
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
3742
unsigned int bgid)
3843
{
@@ -223,7 +228,10 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
223228
if (bl->is_mapped) {
224229
i = bl->buf_ring->tail - bl->head;
225230
if (bl->is_mmap) {
226-
folio_put(virt_to_folio(bl->buf_ring));
231+
/*
232+
* io_kbuf_list_free() will free the page(s) at
233+
* ->release() time.
234+
*/
227235
bl->buf_ring = NULL;
228236
bl->is_mmap = 0;
229237
} else if (bl->buf_nr_pages) {
@@ -531,18 +539,28 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
531539
return -EINVAL;
532540
}
533541

534-
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
542+
static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
543+
struct io_uring_buf_reg *reg,
535544
struct io_buffer_list *bl)
536545
{
537-
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
546+
struct io_buf_free *ibf;
538547
size_t ring_size;
539548
void *ptr;
540549

541550
ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
542-
ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
551+
ptr = io_mem_alloc(ring_size);
543552
if (!ptr)
544553
return -ENOMEM;
545554

555+
/* Allocate and store deferred free entry */
556+
ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT);
557+
if (!ibf) {
558+
io_mem_free(ptr);
559+
return -ENOMEM;
560+
}
561+
ibf->mem = ptr;
562+
hlist_add_head(&ibf->list, &ctx->io_buf_list);
563+
546564
bl->buf_ring = ptr;
547565
bl->is_mapped = 1;
548566
bl->is_mmap = 1;
@@ -599,7 +617,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
599617
if (!(reg.flags & IOU_PBUF_RING_MMAP))
600618
ret = io_pin_pbuf_ring(&reg, bl);
601619
else
602-
ret = io_alloc_pbuf_ring(&reg, bl);
620+
ret = io_alloc_pbuf_ring(ctx, &reg, bl);
603621

604622
if (!ret) {
605623
bl->nr_entries = reg.ring_entries;
@@ -649,3 +667,19 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
649667

650668
return bl->buf_ring;
651669
}
670+
671+
/*
672+
* Called at or after ->release(), free the mmap'ed buffers that we used
673+
* for memory mapped provided buffer rings.
674+
*/
675+
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx)
676+
{
677+
struct io_buf_free *ibf;
678+
struct hlist_node *tmp;
679+
680+
hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) {
681+
hlist_del(&ibf->list);
682+
io_mem_free(ibf->mem);
683+
kfree(ibf);
684+
}
685+
}

io_uring/kbuf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
5151
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
5252
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
5353

54+
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
55+
5456
unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
5557

5658
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);

0 commit comments

Comments
 (0)