42
42
#include <asm/pat.h>
43
43
#endif
44
44
#include <linux/sched.h>
45
+ #include <linux/delay.h>
45
46
#include <rdma/ib_user_verbs.h>
46
47
#include <rdma/ib_addr.h>
47
48
#include <rdma/ib_cache.h>
48
49
#include <linux/mlx5/port.h>
49
50
#include <linux/mlx5/vport.h>
51
+ #include <linux/list.h>
50
52
#include <rdma/ib_smi.h>
51
53
#include <rdma/ib_umem.h>
52
54
#include <linux/in.h>
@@ -983,6 +985,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
983
985
goto out_uars ;
984
986
}
985
987
988
+ INIT_LIST_HEAD (& context -> vma_private_list );
986
989
INIT_LIST_HEAD (& context -> db_page_list );
987
990
mutex_init (& context -> db_page_mutex );
988
991
@@ -1086,6 +1089,125 @@ static int get_index(unsigned long offset)
1086
1089
return get_arg (offset );
1087
1090
}
1088
1091
1092
+ static void mlx5_ib_vma_open (struct vm_area_struct * area )
1093
+ {
1094
+ /* vma_open is called when a new VMA is created on top of our VMA. This
1095
+ * is done through either mremap flow or split_vma (usually due to
1096
+ * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
1097
+ * as this VMA is strongly hardware related. Therefore we set the
1098
+ * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1099
+ * calling us again and trying to do incorrect actions. We assume that
1100
+ * the original VMA size is exactly a single page, and therefore all
1101
+ * "splitting" operation will not happen to it.
1102
+ */
1103
+ area -> vm_ops = NULL ;
1104
+ }
1105
+
1106
+ static void mlx5_ib_vma_close (struct vm_area_struct * area )
1107
+ {
1108
+ struct mlx5_ib_vma_private_data * mlx5_ib_vma_priv_data ;
1109
+
1110
+ /* It's guaranteed that all VMAs opened on a FD are closed before the
1111
+ * file itself is closed, therefore no sync is needed with the regular
1112
+ * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
1113
+ * However need a sync with accessing the vma as part of
1114
+ * mlx5_ib_disassociate_ucontext.
1115
+ * The close operation is usually called under mm->mmap_sem except when
1116
+ * process is exiting.
1117
+ * The exiting case is handled explicitly as part of
1118
+ * mlx5_ib_disassociate_ucontext.
1119
+ */
1120
+ mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data * )area -> vm_private_data ;
1121
+
1122
+ /* setting the vma context pointer to null in the mlx5_ib driver's
1123
+ * private data, to protect a race condition in
1124
+ * mlx5_ib_disassociate_ucontext().
1125
+ */
1126
+ mlx5_ib_vma_priv_data -> vma = NULL ;
1127
+ list_del (& mlx5_ib_vma_priv_data -> list );
1128
+ kfree (mlx5_ib_vma_priv_data );
1129
+ }
1130
+
1131
+ static const struct vm_operations_struct mlx5_ib_vm_ops = {
1132
+ .open = mlx5_ib_vma_open ,
1133
+ .close = mlx5_ib_vma_close
1134
+ };
1135
+
1136
+ static int mlx5_ib_set_vma_data (struct vm_area_struct * vma ,
1137
+ struct mlx5_ib_ucontext * ctx )
1138
+ {
1139
+ struct mlx5_ib_vma_private_data * vma_prv ;
1140
+ struct list_head * vma_head = & ctx -> vma_private_list ;
1141
+
1142
+ vma_prv = kzalloc (sizeof (* vma_prv ), GFP_KERNEL );
1143
+ if (!vma_prv )
1144
+ return - ENOMEM ;
1145
+
1146
+ vma_prv -> vma = vma ;
1147
+ vma -> vm_private_data = vma_prv ;
1148
+ vma -> vm_ops = & mlx5_ib_vm_ops ;
1149
+
1150
+ list_add (& vma_prv -> list , vma_head );
1151
+
1152
+ return 0 ;
1153
+ }
1154
+
1155
+ static void mlx5_ib_disassociate_ucontext (struct ib_ucontext * ibcontext )
1156
+ {
1157
+ int ret ;
1158
+ struct vm_area_struct * vma ;
1159
+ struct mlx5_ib_vma_private_data * vma_private , * n ;
1160
+ struct mlx5_ib_ucontext * context = to_mucontext (ibcontext );
1161
+ struct task_struct * owning_process = NULL ;
1162
+ struct mm_struct * owning_mm = NULL ;
1163
+
1164
+ owning_process = get_pid_task (ibcontext -> tgid , PIDTYPE_PID );
1165
+ if (!owning_process )
1166
+ return ;
1167
+
1168
+ owning_mm = get_task_mm (owning_process );
1169
+ if (!owning_mm ) {
1170
+ pr_info ("no mm, disassociate ucontext is pending task termination\n" );
1171
+ while (1 ) {
1172
+ put_task_struct (owning_process );
1173
+ usleep_range (1000 , 2000 );
1174
+ owning_process = get_pid_task (ibcontext -> tgid ,
1175
+ PIDTYPE_PID );
1176
+ if (!owning_process ||
1177
+ owning_process -> state == TASK_DEAD ) {
1178
+ pr_info ("disassociate ucontext done, task was terminated\n" );
1179
+ /* in case task was dead need to release the
1180
+ * task struct.
1181
+ */
1182
+ if (owning_process )
1183
+ put_task_struct (owning_process );
1184
+ return ;
1185
+ }
1186
+ }
1187
+ }
1188
+
1189
+ /* need to protect from a race on closing the vma as part of
1190
+ * mlx5_ib_vma_close.
1191
+ */
1192
+ down_read (& owning_mm -> mmap_sem );
1193
+ list_for_each_entry_safe (vma_private , n , & context -> vma_private_list ,
1194
+ list ) {
1195
+ vma = vma_private -> vma ;
1196
+ ret = zap_vma_ptes (vma , vma -> vm_start ,
1197
+ PAGE_SIZE );
1198
+ WARN_ONCE (ret , "%s: zap_vma_ptes failed" , __func__ );
1199
+ /* context going to be destroyed, should
1200
+ * not access ops any more.
1201
+ */
1202
+ vma -> vm_ops = NULL ;
1203
+ list_del (& vma_private -> list );
1204
+ kfree (vma_private );
1205
+ }
1206
+ up_read (& owning_mm -> mmap_sem );
1207
+ mmput (owning_mm );
1208
+ put_task_struct (owning_process );
1209
+ }
1210
+
1089
1211
static inline char * mmap_cmd2str (enum mlx5_ib_mmap_cmd cmd )
1090
1212
{
1091
1213
switch (cmd ) {
@@ -1101,8 +1223,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
1101
1223
}
1102
1224
1103
1225
static int uar_mmap (struct mlx5_ib_dev * dev , enum mlx5_ib_mmap_cmd cmd ,
1104
- struct vm_area_struct * vma , struct mlx5_uuar_info * uuari )
1226
+ struct vm_area_struct * vma ,
1227
+ struct mlx5_ib_ucontext * context )
1105
1228
{
1229
+ struct mlx5_uuar_info * uuari = & context -> uuari ;
1106
1230
int err ;
1107
1231
unsigned long idx ;
1108
1232
phys_addr_t pfn , pa ;
@@ -1152,14 +1276,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1152
1276
mlx5_ib_dbg (dev , "mapped %s at 0x%lx, PA %pa\n" , mmap_cmd2str (cmd ),
1153
1277
vma -> vm_start , & pa );
1154
1278
1155
- return 0 ;
1279
+ return mlx5_ib_set_vma_data ( vma , context ) ;
1156
1280
}
1157
1281
1158
1282
static int mlx5_ib_mmap (struct ib_ucontext * ibcontext , struct vm_area_struct * vma )
1159
1283
{
1160
1284
struct mlx5_ib_ucontext * context = to_mucontext (ibcontext );
1161
1285
struct mlx5_ib_dev * dev = to_mdev (ibcontext -> device );
1162
- struct mlx5_uuar_info * uuari = & context -> uuari ;
1163
1286
unsigned long command ;
1164
1287
phys_addr_t pfn ;
1165
1288
@@ -1168,7 +1291,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
1168
1291
case MLX5_IB_MMAP_WC_PAGE :
1169
1292
case MLX5_IB_MMAP_NC_PAGE :
1170
1293
case MLX5_IB_MMAP_REGULAR_PAGE :
1171
- return uar_mmap (dev , command , vma , uuari );
1294
+ return uar_mmap (dev , command , vma , context );
1172
1295
1173
1296
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES :
1174
1297
return - ENOSYS ;
@@ -2428,6 +2551,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2428
2551
dev -> ib_dev .set_vf_guid = mlx5_ib_set_vf_guid ;
2429
2552
}
2430
2553
2554
+ dev -> ib_dev .disassociate_ucontext = mlx5_ib_disassociate_ucontext ;
2555
+
2431
2556
mlx5_ib_internal_fill_odp_caps (dev );
2432
2557
2433
2558
if (MLX5_CAP_GEN (mdev , imaicl )) {
0 commit comments