Skip to content

Commit 829ed62

Browse files
Steve Sistarejgunthorpe
authored andcommitted
iommufd: Add IOMMU_IOAS_CHANGE_PROCESS
Add an ioctl that updates all DMA mappings to reflect the current process, Change the mm and transfer locked memory accounting from old to current mm. This will be used for live update, allowing an old process to hand the iommufd device descriptor to a new process. The new process calls the ioctl. IOMMU_IOAS_CHANGE_PROCESS only supports DMA mappings created with IOMMU_IOAS_MAP_FILE, because the kernel metadata for such mappings does not depend on the userland VA of the pages (which is different in the new process). IOMMU_IOAS_CHANGE_PROCESS fails if other types of mappings are present. This is a revised version of code originally provided by Jason. Link: https://patch.msgid.link/r/[email protected] Suggested-by: Jason Gunthorpe <[email protected]> Signed-off-by: Steve Sistare <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Reviewed-by: Kevin Tian <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 051ae5a commit 829ed62

File tree

5 files changed

+174
-0
lines changed

5 files changed

+174
-0
lines changed

drivers/iommu/iommufd/io_pagetable.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ enum {
173173
IOPT_PAGES_ACCOUNT_NONE = 0,
174174
IOPT_PAGES_ACCOUNT_USER = 1,
175175
IOPT_PAGES_ACCOUNT_MM = 2,
176+
IOPT_PAGES_ACCOUNT_MODE_NUM = 3,
176177
};
177178

178179
enum iopt_address_type {

drivers/iommu/iommufd/ioas.c

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,153 @@ static int iommufd_take_all_iova_rwsem(struct iommufd_ctx *ictx,
439439
return 0;
440440
}
441441

442+
static bool need_charge_update(struct iopt_pages *pages)
443+
{
444+
switch (pages->account_mode) {
445+
case IOPT_PAGES_ACCOUNT_NONE:
446+
return false;
447+
case IOPT_PAGES_ACCOUNT_MM:
448+
return pages->source_mm != current->mm;
449+
case IOPT_PAGES_ACCOUNT_USER:
450+
/*
451+
* Update when mm changes because it also accounts
452+
* in mm->pinned_vm.
453+
*/
454+
return (pages->source_user != current_user()) ||
455+
(pages->source_mm != current->mm);
456+
}
457+
return true;
458+
}
459+
460+
static int charge_current(unsigned long *npinned)
461+
{
462+
struct iopt_pages tmp = {
463+
.source_mm = current->mm,
464+
.source_task = current->group_leader,
465+
.source_user = current_user(),
466+
};
467+
unsigned int account_mode;
468+
int rc;
469+
470+
for (account_mode = 0; account_mode != IOPT_PAGES_ACCOUNT_MODE_NUM;
471+
account_mode++) {
472+
if (!npinned[account_mode])
473+
continue;
474+
475+
tmp.account_mode = account_mode;
476+
rc = iopt_pages_update_pinned(&tmp, npinned[account_mode], true,
477+
NULL);
478+
if (rc)
479+
goto err_undo;
480+
}
481+
return 0;
482+
483+
err_undo:
484+
while (account_mode != 0) {
485+
account_mode--;
486+
if (!npinned[account_mode])
487+
continue;
488+
tmp.account_mode = account_mode;
489+
iopt_pages_update_pinned(&tmp, npinned[account_mode], false,
490+
NULL);
491+
}
492+
return rc;
493+
}
494+
495+
static void change_mm(struct iopt_pages *pages)
496+
{
497+
struct task_struct *old_task = pages->source_task;
498+
struct user_struct *old_user = pages->source_user;
499+
struct mm_struct *old_mm = pages->source_mm;
500+
501+
pages->source_mm = current->mm;
502+
mmgrab(pages->source_mm);
503+
mmdrop(old_mm);
504+
505+
pages->source_task = current->group_leader;
506+
get_task_struct(pages->source_task);
507+
put_task_struct(old_task);
508+
509+
pages->source_user = get_uid(current_user());
510+
free_uid(old_user);
511+
}
512+
513+
#define for_each_ioas_area(_xa, _index, _ioas, _area) \
514+
xa_for_each((_xa), (_index), (_ioas)) \
515+
for (_area = iopt_area_iter_first(&_ioas->iopt, 0, ULONG_MAX); \
516+
_area; \
517+
_area = iopt_area_iter_next(_area, 0, ULONG_MAX))
518+
519+
int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd)
520+
{
521+
struct iommu_ioas_change_process *cmd = ucmd->cmd;
522+
struct iommufd_ctx *ictx = ucmd->ictx;
523+
unsigned long all_npinned[IOPT_PAGES_ACCOUNT_MODE_NUM] = {};
524+
struct iommufd_ioas *ioas;
525+
struct iopt_area *area;
526+
struct iopt_pages *pages;
527+
struct xarray ioas_list;
528+
unsigned long index;
529+
int rc;
530+
531+
if (cmd->__reserved)
532+
return -EOPNOTSUPP;
533+
534+
xa_init(&ioas_list);
535+
rc = iommufd_take_all_iova_rwsem(ictx, &ioas_list);
536+
if (rc)
537+
return rc;
538+
539+
for_each_ioas_area(&ioas_list, index, ioas, area) {
540+
if (area->pages->type != IOPT_ADDRESS_FILE) {
541+
rc = -EINVAL;
542+
goto out;
543+
}
544+
}
545+
546+
/*
547+
* Count last_pinned pages, then clear it to avoid double counting
548+
* if the same iopt_pages is visited multiple times in this loop.
549+
* Since we are under all the locks, npinned == last_npinned, so we
550+
* can easily restore last_npinned before we return.
551+
*/
552+
for_each_ioas_area(&ioas_list, index, ioas, area) {
553+
pages = area->pages;
554+
555+
if (need_charge_update(pages)) {
556+
all_npinned[pages->account_mode] += pages->last_npinned;
557+
pages->last_npinned = 0;
558+
}
559+
}
560+
561+
rc = charge_current(all_npinned);
562+
563+
if (rc) {
564+
/* Charge failed. Fix last_npinned and bail. */
565+
for_each_ioas_area(&ioas_list, index, ioas, area)
566+
area->pages->last_npinned = area->pages->npinned;
567+
goto out;
568+
}
569+
570+
for_each_ioas_area(&ioas_list, index, ioas, area) {
571+
pages = area->pages;
572+
573+
/* Uncharge the old one (which also restores last_npinned) */
574+
if (need_charge_update(pages)) {
575+
int r = iopt_pages_update_pinned(pages, pages->npinned,
576+
false, NULL);
577+
578+
if (WARN_ON(r))
579+
rc = r;
580+
}
581+
change_mm(pages);
582+
}
583+
584+
out:
585+
iommufd_release_all_iova_rwsem(ictx, &ioas_list);
586+
return rc;
587+
}
588+
442589
int iommufd_option_rlimit_mode(struct iommu_option *cmd,
443590
struct iommufd_ctx *ictx)
444591
{

drivers/iommu/iommufd/iommufd_private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
255255
int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
256256
int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
257257
int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd);
258+
int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd);
258259
int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
259260
int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
260261
int iommufd_ioas_option(struct iommufd_ucmd *ucmd);

drivers/iommu/iommufd/main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
349349
struct iommu_ioas_alloc, out_ioas_id),
350350
IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
351351
struct iommu_ioas_allow_iovas, allowed_iovas),
352+
IOCTL_OP(IOMMU_IOAS_CHANGE_PROCESS, iommufd_ioas_change_process,
353+
struct iommu_ioas_change_process, __reserved),
352354
IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy,
353355
src_iova),
354356
IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges,

include/uapi/linux/iommufd.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ enum {
5454
IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
5555
IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
5656
IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
57+
IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
5758
};
5859

5960
/**
@@ -972,4 +973,26 @@ struct iommu_vdevice_alloc {
972973
__aligned_u64 virt_id;
973974
};
974975
#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
976+
977+
/**
978+
* struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
979+
* @size: sizeof(struct iommu_ioas_change_process)
980+
* @__reserved: Must be 0
981+
*
982+
* This transfers pinned memory counts for every memory map in every IOAS
983+
* in the context to the current process. This only supports maps created
984+
* with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
985+
* If the ioctl returns a failure status, then nothing is changed.
986+
*
987+
* This API is useful for transferring operation of a device from one process
988+
* to another, such as during userland live update.
989+
*/
990+
struct iommu_ioas_change_process {
991+
__u32 size;
992+
__u32 __reserved;
993+
};
994+
995+
#define IOMMU_IOAS_CHANGE_PROCESS \
996+
_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
997+
975998
#endif

0 commit comments

Comments
 (0)