Skip to content

Commit cc149f7

Browse files
haggaierolandd
authored andcommitted
IB/mlx5: Changes in memory region creation to support on-demand paging
This patch wraps together several changes needed for on-demand paging support in the mlx5_ib_populate_pas function, and when registering memory regions. * Instead of accepting a UMR bit telling the function to enable all access flags, the function now accepts the access flags themselves. * For on-demand paging memory regions, fill the memory tables from the correct list, and enable/disable the access flags per-page according to whether the page is present. * A new bit is set to enable writing of access flags when using the firmware create_mkey command. * Disable contig pages when on-demand paging is enabled. In addition the patch changes the UMR code to use PTR_ALIGN instead of our own macro. Signed-off-by: Haggai Eran <[email protected]> Signed-off-by: Roland Dreier <[email protected]>
1 parent 8cdd312 commit cc149f7

File tree

4 files changed

+88
-18
lines changed

4 files changed

+88
-18
lines changed

drivers/infiniband/hw/mlx5/mem.c

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#include <linux/module.h>
3434
#include <rdma/ib_umem.h>
35+
#include <rdma/ib_umem_odp.h>
3536
#include "mlx5_ib.h"
3637

3738
/* @umem: umem object to scan
@@ -57,6 +58,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
5758
int entry;
5859
unsigned long page_shift = ilog2(umem->page_size);
5960

61+
/* With ODP we must always match OS page size. */
62+
if (umem->odp_data) {
63+
*count = ib_umem_page_count(umem);
64+
*shift = PAGE_SHIFT;
65+
*ncont = *count;
66+
if (order)
67+
*order = ilog2(roundup_pow_of_two(*count));
68+
69+
return;
70+
}
71+
6072
addr = addr >> page_shift;
6173
tmp = (unsigned long)addr;
6274
m = find_first_bit(&tmp, sizeof(tmp));
@@ -108,8 +120,32 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
108120
*count = i;
109121
}
110122

123+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
124+
static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
125+
{
126+
u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
127+
128+
if (umem_dma & ODP_READ_ALLOWED_BIT)
129+
mtt_entry |= MLX5_IB_MTT_READ;
130+
if (umem_dma & ODP_WRITE_ALLOWED_BIT)
131+
mtt_entry |= MLX5_IB_MTT_WRITE;
132+
133+
return mtt_entry;
134+
}
135+
#endif
136+
137+
/*
138+
* Populate the given array with bus addresses from the umem.
139+
*
140+
* dev - mlx5_ib device
141+
* umem - umem to use to fill the pages
142+
* page_shift - determines the page size used in the resulting array
143+
* pas - bus addresses array to fill
144+
* access_flags - access flags to set on all present pages.
145+
use enum mlx5_ib_mtt_access_flags for this.
146+
*/
111147
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
112-
int page_shift, __be64 *pas, int umr)
148+
int page_shift, __be64 *pas, int access_flags)
113149
{
114150
unsigned long umem_page_shift = ilog2(umem->page_size);
115151
int shift = page_shift - umem_page_shift;
@@ -120,6 +156,23 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
120156
int len;
121157
struct scatterlist *sg;
122158
int entry;
159+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
160+
const bool odp = umem->odp_data != NULL;
161+
162+
if (odp) {
163+
int num_pages = ib_umem_num_pages(umem);
164+
165+
WARN_ON(shift != 0);
166+
WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
167+
168+
for (i = 0; i < num_pages; ++i) {
169+
dma_addr_t pa = umem->odp_data->dma_list[i];
170+
171+
pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
172+
}
173+
return;
174+
}
175+
#endif
123176

124177
i = 0;
125178
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -128,8 +181,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
128181
for (k = 0; k < len; k++) {
129182
if (!(i & mask)) {
130183
cur = base + (k << umem_page_shift);
131-
if (umr)
132-
cur |= 3;
184+
cur |= access_flags;
133185

134186
pas[i >> shift] = cpu_to_be64(cur);
135187
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,13 @@ struct mlx5_ib_xrcd {
268268
u32 xrcdn;
269269
};
270270

271+
enum mlx5_ib_mtt_access_flags {
272+
MLX5_IB_MTT_READ = (1 << 0),
273+
MLX5_IB_MTT_WRITE = (1 << 1),
274+
};
275+
276+
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
277+
271278
struct mlx5_ib_mr {
272279
struct ib_mr ibmr;
273280
struct mlx5_core_mr mmr;
@@ -552,7 +559,7 @@ void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
552559
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
553560
int *ncont, int *order);
554561
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
555-
int page_shift, __be64 *pas, int umr);
562+
int page_shift, __be64 *pas, int access_flags);
556563
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
557564
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
558565
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -588,4 +595,7 @@ static inline u8 convert_access(int acc)
588595
MLX5_PERM_LOCAL_READ;
589596
}
590597

598+
#define MLX5_MAX_UMR_SHIFT 16
599+
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
600+
591601
#endif /* MLX5_IB_H */

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,6 @@ enum {
4848
MLX5_UMR_ALIGN = 2048
4949
};
5050

51-
static __be64 *mr_align(__be64 *ptr, int align)
52-
{
53-
unsigned long mask = align - 1;
54-
55-
return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
56-
}
57-
5851
static int order2idx(struct mlx5_ib_dev *dev, int order)
5952
{
6053
struct mlx5_mr_cache *cache = &dev->cache;
@@ -669,7 +662,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
669662

670663
static int use_umr(int order)
671664
{
672-
return order <= 17;
665+
return order <= MLX5_MAX_UMR_SHIFT;
673666
}
674667

675668
static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
@@ -747,8 +740,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
747740
struct ib_send_wr wr, *bad;
748741
struct mlx5_ib_mr *mr;
749742
struct ib_sge sg;
750-
int size = sizeof(u64) * npages;
743+
int size;
751744
__be64 *mr_pas;
745+
__be64 *pas;
752746
dma_addr_t dma;
753747
int err = 0;
754748
int i;
@@ -768,17 +762,22 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
768762
if (!mr)
769763
return ERR_PTR(-EAGAIN);
770764

765+
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
766+
* To avoid copying garbage after the pas array, we allocate
767+
* a little more. */
768+
size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
771769
mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
772770
if (!mr_pas) {
773771
err = -ENOMEM;
774772
goto free_mr;
775773
}
776774

777-
mlx5_ib_populate_pas(dev, umem, page_shift,
778-
mr_align(mr_pas, MLX5_UMR_ALIGN), 1);
775+
pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
776+
mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
777+
/* Clear padding after the actual pages. */
778+
memset(pas + npages, 0, size - npages * sizeof(u64));
779779

780-
dma = dma_map_single(ddev, mr_align(mr_pas, MLX5_UMR_ALIGN), size,
781-
DMA_TO_DEVICE);
780+
dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
782781
if (dma_mapping_error(ddev, dma)) {
783782
err = -ENOMEM;
784783
goto free_pas;
@@ -833,6 +832,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
833832
struct mlx5_ib_mr *mr;
834833
int inlen;
835834
int err;
835+
bool pg_cap = !!(dev->mdev->caps.gen.flags &
836+
MLX5_DEV_CAP_FLAG_ON_DMND_PG);
836837

837838
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
838839
if (!mr)
@@ -844,8 +845,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
844845
err = -ENOMEM;
845846
goto err_1;
846847
}
847-
mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
848+
mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
849+
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
848850

851+
/* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
852+
* in the page list submitted with the command. */
853+
in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
849854
in->seg.flags = convert_access(access_flags) |
850855
MLX5_ACCESS_MODE_MTT;
851856
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);

include/linux/mlx5/device.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ enum {
198198
MLX5_UMR_INLINE = (1 << 7),
199199
};
200200

201+
#define MLX5_UMR_MTT_ALIGNMENT 0x40
202+
#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
203+
201204
enum mlx5_event {
202205
MLX5_EVENT_TYPE_COMP = 0x0,
203206

0 commit comments

Comments
 (0)