Skip to content

Commit 81713d3

Browse files
Artemy-Mellanoxdledford
authored andcommitted
IB/mlx5: Add implicit MR support
Add implicit MR, covering entire user address space. The MR is implemented as an indirect KSM MR consisting of 1GB direct MRs. Pages and direct MRs are added/removed to MR by ODP. Signed-off-by: Artemy Kovalyov <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 49780d4 commit 81713d3

File tree

5 files changed

+513
-49
lines changed

5 files changed

+513
-49
lines changed

drivers/infiniband/hw/mlx5/main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3583,6 +3583,8 @@ static int __init mlx5_ib_init(void)
35833583
{
35843584
int err;
35853585

3586+
mlx5_ib_odp_init();
3587+
35863588
err = mlx5_register_interface(&mlx5_ib_interface);
35873589

35883590
return err;

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ struct mlx5_ib_flow_db {
202202
#define MLX5_IB_UPD_XLT_ADDR BIT(3)
203203
#define MLX5_IB_UPD_XLT_PD BIT(4)
204204
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
205+
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
205206

206207
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
207208
*
@@ -503,6 +504,10 @@ struct mlx5_ib_mr {
503504
int live;
504505
void *descs_alloc;
505506
int access_flags; /* Needed for rereg MR */
507+
508+
struct mlx5_ib_mr *parent;
509+
atomic_t num_leaf_free;
510+
wait_queue_head_t q_leaf_free;
506511
};
507512

508513
struct mlx5_ib_mw {
@@ -637,6 +642,7 @@ struct mlx5_ib_dev {
637642
* being used by a page fault handler.
638643
*/
639644
struct srcu_struct mr_srcu;
645+
u32 null_mkey;
640646
#endif
641647
struct mlx5_ib_flow_db flow_db;
642648
/* protect resources needed as part of reset flow */
@@ -789,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
789795
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
790796
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
791797
int page_shift, int flags);
798+
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
799+
int access_flags);
800+
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
792801
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
793802
u64 length, u64 virt_addr, int access_flags,
794803
struct ib_pd *pd, struct ib_udata *udata);
@@ -868,16 +877,23 @@ int __init mlx5_ib_odp_init(void);
868877
void mlx5_ib_odp_cleanup(void);
869878
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
870879
unsigned long end);
880+
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
881+
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
882+
size_t nentries, struct mlx5_ib_mr *mr, int flags);
871883
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
872884
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
873885
{
874886
return;
875887
}
876888

877889
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
878-
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
890+
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
879891
static inline int mlx5_ib_odp_init(void) { return 0; }
880-
static inline void mlx5_ib_odp_cleanup(void) {}
892+
static inline void mlx5_ib_odp_cleanup(void) {}
893+
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
894+
static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
895+
size_t nentries, struct mlx5_ib_mr *mr,
896+
int flags) {}
881897

882898
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
883899

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
469469
spin_unlock_irq(&ent->lock);
470470

471471
err = add_keys(dev, entry, 1);
472-
if (err)
472+
if (err && err != -EAGAIN)
473473
return ERR_PTR(err);
474474

475475
wait_for_completion(&ent->compl);
@@ -669,8 +669,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
669669
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
670670
queue_work(cache->wq, &ent->work);
671671

672-
if (i > MAX_UMR_CACHE_ENTRY)
672+
if (i > MAX_UMR_CACHE_ENTRY) {
673+
mlx5_odp_init_mr_cache_entry(ent);
673674
continue;
675+
}
674676

675677
if (!use_umr(dev, ent->order))
676678
continue;
@@ -935,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
935937
{
936938
struct mlx5_ib_dev *dev = mr->dev;
937939
struct ib_umem *umem = mr->umem;
940+
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
941+
mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
942+
return npages;
943+
}
938944

939945
npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
940946

@@ -968,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
968974
struct mlx5_umr_wr wr;
969975
struct ib_sge sg;
970976
int err = 0;
971-
int desc_size = sizeof(struct mlx5_mtt);
977+
int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
978+
? sizeof(struct mlx5_klm)
979+
: sizeof(struct mlx5_mtt);
972980
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
973981
const int page_mask = page_align - 1;
974982
size_t pages_mapped = 0;
@@ -1186,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
11861194

11871195
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
11881196
start, virt_addr, length, access_flags);
1197+
1198+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1199+
if (!start && length == U64_MAX) {
1200+
if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1201+
!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1202+
return ERR_PTR(-EINVAL);
1203+
1204+
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
1205+
return &mr->ibmr;
1206+
}
1207+
#endif
1208+
11891209
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
11901210
&page_shift, &ncont, &order);
11911211

@@ -1471,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
14711491
/* Wait for all running page-fault handlers to finish. */
14721492
synchronize_srcu(&dev->mr_srcu);
14731493
/* Destroy all page mappings */
1474-
mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1475-
ib_umem_end(umem));
1494+
if (umem->odp_data->page_list)
1495+
mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1496+
ib_umem_end(umem));
1497+
else
1498+
mlx5_ib_free_implicit_mr(mr);
14761499
/*
14771500
* We kill the umem before the MR for ODP,
14781501
* so that there will not be any invalidations in

0 commit comments

Comments
 (0)