Skip to content

Commit 7c2344c

Browse files
Maor Gottliebdledford
authored andcommitted
IB/mlx5: Implements disassociate_ucontext API
Implements the IB core disassociate_ucontext API. The driver detaches the HW resources for a given user context to prevent a dependency between application termination and device disconnect. This is done by managing the VMAs that were mapped to the HW bars such as doorbell and blueflame. When need to detach, remap them to an arbitrary kernel page returned by the zap API. Signed-off-by: Maor Gottlieb <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 28d6137 commit 7c2344c

File tree

2 files changed

+135
-4
lines changed

2 files changed

+135
-4
lines changed

drivers/infiniband/hw/mlx5/main.c

Lines changed: 129 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,13 @@
4242
#include <asm/pat.h>
4343
#endif
4444
#include <linux/sched.h>
45+
#include <linux/delay.h>
4546
#include <rdma/ib_user_verbs.h>
4647
#include <rdma/ib_addr.h>
4748
#include <rdma/ib_cache.h>
4849
#include <linux/mlx5/port.h>
4950
#include <linux/mlx5/vport.h>
51+
#include <linux/list.h>
5052
#include <rdma/ib_smi.h>
5153
#include <rdma/ib_umem.h>
5254
#include <linux/in.h>
@@ -983,6 +985,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
983985
goto out_uars;
984986
}
985987

988+
INIT_LIST_HEAD(&context->vma_private_list);
986989
INIT_LIST_HEAD(&context->db_page_list);
987990
mutex_init(&context->db_page_mutex);
988991

@@ -1086,6 +1089,125 @@ static int get_index(unsigned long offset)
10861089
return get_arg(offset);
10871090
}
10881091

1092+
static void mlx5_ib_vma_open(struct vm_area_struct *area)
1093+
{
1094+
/* vma_open is called when a new VMA is created on top of our VMA. This
1095+
* is done through either mremap flow or split_vma (usually due to
1096+
* mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
1097+
* as this VMA is strongly hardware related. Therefore we set the
1098+
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1099+
* calling us again and trying to do incorrect actions. We assume that
1100+
* the original VMA size is exactly a single page, and therefore all
1101+
* "splitting" operation will not happen to it.
1102+
*/
1103+
area->vm_ops = NULL;
1104+
}
1105+
1106+
static void mlx5_ib_vma_close(struct vm_area_struct *area)
1107+
{
1108+
struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
1109+
1110+
/* It's guaranteed that all VMAs opened on a FD are closed before the
1111+
* file itself is closed, therefore no sync is needed with the regular
1112+
* closing flow. (e.g. mlx5 ib_dealloc_ucontext)
1113+
* However need a sync with accessing the vma as part of
1114+
* mlx5_ib_disassociate_ucontext.
1115+
* The close operation is usually called under mm->mmap_sem except when
1116+
* process is exiting.
1117+
* The exiting case is handled explicitly as part of
1118+
* mlx5_ib_disassociate_ucontext.
1119+
*/
1120+
mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
1121+
1122+
/* setting the vma context pointer to null in the mlx5_ib driver's
1123+
* private data, to protect a race condition in
1124+
* mlx5_ib_disassociate_ucontext().
1125+
*/
1126+
mlx5_ib_vma_priv_data->vma = NULL;
1127+
list_del(&mlx5_ib_vma_priv_data->list);
1128+
kfree(mlx5_ib_vma_priv_data);
1129+
}
1130+
1131+
static const struct vm_operations_struct mlx5_ib_vm_ops = {
1132+
.open = mlx5_ib_vma_open,
1133+
.close = mlx5_ib_vma_close
1134+
};
1135+
1136+
static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
1137+
struct mlx5_ib_ucontext *ctx)
1138+
{
1139+
struct mlx5_ib_vma_private_data *vma_prv;
1140+
struct list_head *vma_head = &ctx->vma_private_list;
1141+
1142+
vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
1143+
if (!vma_prv)
1144+
return -ENOMEM;
1145+
1146+
vma_prv->vma = vma;
1147+
vma->vm_private_data = vma_prv;
1148+
vma->vm_ops = &mlx5_ib_vm_ops;
1149+
1150+
list_add(&vma_prv->list, vma_head);
1151+
1152+
return 0;
1153+
}
1154+
1155+
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1156+
{
1157+
int ret;
1158+
struct vm_area_struct *vma;
1159+
struct mlx5_ib_vma_private_data *vma_private, *n;
1160+
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1161+
struct task_struct *owning_process = NULL;
1162+
struct mm_struct *owning_mm = NULL;
1163+
1164+
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
1165+
if (!owning_process)
1166+
return;
1167+
1168+
owning_mm = get_task_mm(owning_process);
1169+
if (!owning_mm) {
1170+
pr_info("no mm, disassociate ucontext is pending task termination\n");
1171+
while (1) {
1172+
put_task_struct(owning_process);
1173+
usleep_range(1000, 2000);
1174+
owning_process = get_pid_task(ibcontext->tgid,
1175+
PIDTYPE_PID);
1176+
if (!owning_process ||
1177+
owning_process->state == TASK_DEAD) {
1178+
pr_info("disassociate ucontext done, task was terminated\n");
1179+
/* in case task was dead need to release the
1180+
* task struct.
1181+
*/
1182+
if (owning_process)
1183+
put_task_struct(owning_process);
1184+
return;
1185+
}
1186+
}
1187+
}
1188+
1189+
/* need to protect from a race on closing the vma as part of
1190+
* mlx5_ib_vma_close.
1191+
*/
1192+
down_read(&owning_mm->mmap_sem);
1193+
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
1194+
list) {
1195+
vma = vma_private->vma;
1196+
ret = zap_vma_ptes(vma, vma->vm_start,
1197+
PAGE_SIZE);
1198+
WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
1199+
/* context going to be destroyed, should
1200+
* not access ops any more.
1201+
*/
1202+
vma->vm_ops = NULL;
1203+
list_del(&vma_private->list);
1204+
kfree(vma_private);
1205+
}
1206+
up_read(&owning_mm->mmap_sem);
1207+
mmput(owning_mm);
1208+
put_task_struct(owning_process);
1209+
}
1210+
10891211
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
10901212
{
10911213
switch (cmd) {
@@ -1101,8 +1223,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
11011223
}
11021224

11031225
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1104-
struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
1226+
struct vm_area_struct *vma,
1227+
struct mlx5_ib_ucontext *context)
11051228
{
1229+
struct mlx5_uuar_info *uuari = &context->uuari;
11061230
int err;
11071231
unsigned long idx;
11081232
phys_addr_t pfn, pa;
@@ -1152,14 +1276,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
11521276
mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
11531277
vma->vm_start, &pa);
11541278

1155-
return 0;
1279+
return mlx5_ib_set_vma_data(vma, context);
11561280
}
11571281

11581282
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
11591283
{
11601284
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
11611285
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1162-
struct mlx5_uuar_info *uuari = &context->uuari;
11631286
unsigned long command;
11641287
phys_addr_t pfn;
11651288

@@ -1168,7 +1291,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
11681291
case MLX5_IB_MMAP_WC_PAGE:
11691292
case MLX5_IB_MMAP_NC_PAGE:
11701293
case MLX5_IB_MMAP_REGULAR_PAGE:
1171-
return uar_mmap(dev, command, vma, uuari);
1294+
return uar_mmap(dev, command, vma, context);
11721295

11731296
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
11741297
return -ENOSYS;
@@ -2428,6 +2551,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
24282551
dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
24292552
}
24302553

2554+
dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
2555+
24312556
mlx5_ib_internal_fill_odp_caps(dev);
24322557

24332558
if (MLX5_CAP_GEN(mdev, imaicl)) {

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ enum {
105105
MLX5_CQE_VERSION_V1,
106106
};
107107

108+
struct mlx5_ib_vma_private_data {
109+
struct list_head list;
110+
struct vm_area_struct *vma;
111+
};
112+
108113
struct mlx5_ib_ucontext {
109114
struct ib_ucontext ibucontext;
110115
struct list_head db_page_list;
@@ -116,6 +121,7 @@ struct mlx5_ib_ucontext {
116121
u8 cqe_version;
117122
/* Transport Domain number */
118123
u32 tdn;
124+
struct list_head vma_private_list;
119125
};
120126

121127
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)

0 commit comments

Comments
 (0)