Skip to content

Commit 6aec21f

Browse files
haggaierolandd
authored andcommitted
IB/mlx5: Page faults handling infrastructure
* Refactor MR registration and cleanup, and fix reg_pages accounting. * Create a work queue to handle page fault events in a kthread context. * Register a fault handler to get events from the core for each QP. The registered fault handler is empty in this patch, and only a later patch implements it. Signed-off-by: Sagi Grimberg <[email protected]> Signed-off-by: Shachar Raindel <[email protected]> Signed-off-by: Haggai Eran <[email protected]> Signed-off-by: Roland Dreier <[email protected]>
1 parent 832a6b0 commit 6aec21f

File tree

6 files changed

+294
-22
lines changed

6 files changed

+294
-22
lines changed

drivers/infiniband/hw/mlx5/main.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ static ssize_t show_reg_pages(struct device *device,
864864
struct mlx5_ib_dev *dev =
865865
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
866866

867-
return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
867+
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
868868
}
869869

870870
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -1389,16 +1389,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
13891389
goto err_eqs;
13901390

13911391
mutex_init(&dev->cap_mask_mutex);
1392-
spin_lock_init(&dev->mr_lock);
13931392

13941393
err = create_dev_resources(&dev->devr);
13951394
if (err)
13961395
goto err_eqs;
13971396

1398-
err = ib_register_device(&dev->ib_dev, NULL);
1397+
err = mlx5_ib_odp_init_one(dev);
13991398
if (err)
14001399
goto err_rsrc;
14011400

1401+
err = ib_register_device(&dev->ib_dev, NULL);
1402+
if (err)
1403+
goto err_odp;
1404+
14021405
err = create_umr_res(dev);
14031406
if (err)
14041407
goto err_dev;
@@ -1420,6 +1423,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
14201423
err_dev:
14211424
ib_unregister_device(&dev->ib_dev);
14221425

1426+
err_odp:
1427+
mlx5_ib_odp_remove_one(dev);
1428+
14231429
err_rsrc:
14241430
destroy_dev_resources(&dev->devr);
14251431

@@ -1435,8 +1441,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
14351441
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
14361442
{
14371443
struct mlx5_ib_dev *dev = context;
1444+
14381445
ib_unregister_device(&dev->ib_dev);
14391446
destroy_umrc_res(dev);
1447+
mlx5_ib_odp_remove_one(dev);
14401448
destroy_dev_resources(&dev->devr);
14411449
free_comp_eqs(dev);
14421450
ib_dealloc_device(&dev->ib_dev);
@@ -1450,15 +1458,30 @@ static struct mlx5_interface mlx5_ib_interface = {
14501458

14511459
static int __init mlx5_ib_init(void)
14521460
{
1461+
int err;
1462+
14531463
if (deprecated_prof_sel != 2)
14541464
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
14551465

1456-
return mlx5_register_interface(&mlx5_ib_interface);
1466+
err = mlx5_ib_odp_init();
1467+
if (err)
1468+
return err;
1469+
1470+
err = mlx5_register_interface(&mlx5_ib_interface);
1471+
if (err)
1472+
goto clean_odp;
1473+
1474+
return err;
1475+
1476+
clean_odp:
1477+
mlx5_ib_odp_cleanup();
1478+
return err;
14571479
}
14581480

14591481
static void __exit mlx5_ib_cleanup(void)
14601482
{
14611483
mlx5_unregister_interface(&mlx5_ib_interface);
1484+
mlx5_ib_odp_cleanup();
14621485
}
14631486

14641487
module_init(mlx5_ib_init);

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,29 @@ enum {
149149
MLX5_QP_EMPTY
150150
};
151151

152+
/*
153+
* Connect-IB can trigger up to four concurrent pagefaults
154+
* per-QP.
155+
*/
156+
enum mlx5_ib_pagefault_context {
157+
MLX5_IB_PAGEFAULT_RESPONDER_READ,
158+
MLX5_IB_PAGEFAULT_REQUESTOR_READ,
159+
MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
160+
MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
161+
MLX5_IB_PAGEFAULT_CONTEXTS
162+
};
163+
164+
static inline enum mlx5_ib_pagefault_context
165+
mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
166+
{
167+
return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
168+
}
169+
170+
struct mlx5_ib_pfault {
171+
struct work_struct work;
172+
struct mlx5_pagefault mpfault;
173+
};
174+
152175
struct mlx5_ib_qp {
153176
struct ib_qp ibqp;
154177
struct mlx5_core_qp mqp;
@@ -194,6 +217,21 @@ struct mlx5_ib_qp {
194217

195218
/* Store signature errors */
196219
bool signature_en;
220+
221+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
222+
/*
223+
* A flag that is true for QP's that are in a state that doesn't
224+
* allow page faults, and shouldn't schedule any more faults.
225+
*/
226+
int disable_page_faults;
227+
/*
228+
* The disable_page_faults_lock protects a QP's disable_page_faults
229+
* field, allowing for a thread to atomically check whether the QP
230+
* allows page faults, and if so schedule a page fault.
231+
*/
232+
spinlock_t disable_page_faults_lock;
233+
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
234+
#endif
197235
};
198236

199237
struct mlx5_ib_cq_buf {
@@ -392,13 +430,17 @@ struct mlx5_ib_dev {
392430
struct umr_common umrc;
393431
/* sync used page count stats
394432
*/
395-
spinlock_t mr_lock;
396433
struct mlx5_ib_resources devr;
397434
struct mlx5_mr_cache cache;
398435
struct timer_list delay_timer;
399436
int fill_delay;
400437
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
401438
struct ib_odp_caps odp_caps;
439+
/*
440+
* Sleepable RCU that prevents destruction of MRs while they are still
441+
* being used by a page fault handler.
442+
*/
443+
struct srcu_struct mr_srcu;
402444
#endif
403445
};
404446

@@ -575,12 +617,33 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
575617
struct ib_mr_status *mr_status);
576618

577619
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
620+
extern struct workqueue_struct *mlx5_ib_page_fault_wq;
621+
578622
int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev);
579-
#else
623+
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
624+
struct mlx5_ib_pfault *pfault);
625+
void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
626+
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
627+
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
628+
int __init mlx5_ib_odp_init(void);
629+
void mlx5_ib_odp_cleanup(void);
630+
void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
631+
void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
632+
633+
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
580634
static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
581635
{
582636
return 0;
583637
}
638+
639+
static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
640+
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
641+
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
642+
static inline int mlx5_ib_odp_init(void) { return 0; }
643+
static inline void mlx5_ib_odp_cleanup(void) {}
644+
static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
645+
static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
646+
584647
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
585648

586649
static inline void init_query_mad(struct ib_smp *mad)

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ static __be64 mlx5_ib_update_mtt_emergency_buffer[
5252
static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
5353
#endif
5454

55+
static int clean_mr(struct mlx5_ib_mr *mr);
56+
5557
static int order2idx(struct mlx5_ib_dev *dev, int order)
5658
{
5759
struct mlx5_mr_cache *cache = &dev->cache;
@@ -1049,6 +1051,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
10491051
mlx5_ib_dbg(dev, "cache empty for order %d", order);
10501052
mr = NULL;
10511053
}
1054+
} else if (access_flags & IB_ACCESS_ON_DEMAND) {
1055+
err = -EINVAL;
1056+
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
1057+
goto error;
10521058
}
10531059

10541060
if (!mr)
@@ -1064,9 +1070,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
10641070

10651071
mr->umem = umem;
10661072
mr->npages = npages;
1067-
spin_lock(&dev->mr_lock);
1068-
dev->mdev->priv.reg_pages += npages;
1069-
spin_unlock(&dev->mr_lock);
1073+
atomic_add(npages, &dev->mdev->priv.reg_pages);
10701074
mr->ibmr.lkey = mr->mmr.key;
10711075
mr->ibmr.rkey = mr->mmr.key;
10721076

@@ -1110,12 +1114,9 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
11101114
return err;
11111115
}
11121116

1113-
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1117+
static int clean_mr(struct mlx5_ib_mr *mr)
11141118
{
1115-
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1116-
struct mlx5_ib_mr *mr = to_mmr(ibmr);
1117-
struct ib_umem *umem = mr->umem;
1118-
int npages = mr->npages;
1119+
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
11191120
int umred = mr->umred;
11201121
int err;
11211122

@@ -1135,16 +1136,32 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
11351136
free_cached_mr(dev, mr);
11361137
}
11371138

1139+
if (!umred)
1140+
kfree(mr);
1141+
1142+
return 0;
1143+
}
1144+
1145+
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1146+
{
1147+
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1148+
struct mlx5_ib_mr *mr = to_mmr(ibmr);
1149+
int npages = mr->npages;
1150+
struct ib_umem *umem = mr->umem;
1151+
1152+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1153+
if (umem)
1154+
/* Wait for all running page-fault handlers to finish. */
1155+
synchronize_srcu(&dev->mr_srcu);
1156+
#endif
1157+
1158+
clean_mr(mr);
1159+
11381160
if (umem) {
11391161
ib_umem_release(umem);
1140-
spin_lock(&dev->mr_lock);
1141-
dev->mdev->priv.reg_pages -= npages;
1142-
spin_unlock(&dev->mr_lock);
1162+
atomic_sub(npages, &dev->mdev->priv.reg_pages);
11431163
}
11441164

1145-
if (!umred)
1146-
kfree(mr);
1147-
11481165
return 0;
11491166
}
11501167

0 commit comments

Comments
 (0)