Skip to content

Commit 832a6b0

Browse files
haggaierolandd
authored andcommitted
IB/mlx5: Add mlx5_ib_update_mtt to update page tables after creation
The new function allows updating the page tables of a memory region after it was created. This can be used to handle page faults and page invalidations. Since mlx5_ib_update_mtt will need to work from within page invalidation, so it must not block on memory allocation. It employs an atomic memory allocation mechanism that is used as a fallback when kmalloc(GFP_ATOMIC) fails. In order to reuse code from mlx5_ib_populate_pas, the patch splits this function and add the needed parameters. Signed-off-by: Haggai Eran <[email protected]> Signed-off-by: Shachar Raindel <[email protected]> Signed-off-by: Roland Dreier <[email protected]>
1 parent cc149f7 commit 832a6b0

File tree

4 files changed

+149
-8
lines changed

4 files changed

+149
-8
lines changed

drivers/infiniband/hw/mlx5/mem.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,16 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
140140
* dev - mlx5_ib device
141141
* umem - umem to use to fill the pages
142142
* page_shift - determines the page size used in the resulting array
143+
* offset - offset into the umem to start from,
144+
* only implemented for ODP umems
145+
* num_pages - total number of pages to fill
143146
* pas - bus addresses array to fill
144147
* access_flags - access flags to set on all present pages.
145148
use enum mlx5_ib_mtt_access_flags for this.
146149
*/
147-
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
148-
int page_shift, __be64 *pas, int access_flags)
150+
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
151+
int page_shift, size_t offset, size_t num_pages,
152+
__be64 *pas, int access_flags)
149153
{
150154
unsigned long umem_page_shift = ilog2(umem->page_size);
151155
int shift = page_shift - umem_page_shift;
@@ -160,13 +164,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
160164
const bool odp = umem->odp_data != NULL;
161165

162166
if (odp) {
163-
int num_pages = ib_umem_num_pages(umem);
164-
165167
WARN_ON(shift != 0);
166168
WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
167169

168170
for (i = 0; i < num_pages; ++i) {
169-
dma_addr_t pa = umem->odp_data->dma_list[i];
171+
dma_addr_t pa = umem->odp_data->dma_list[offset + i];
170172

171173
pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
172174
}
@@ -194,6 +196,13 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
194196
}
195197
}
196198

199+
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
200+
int page_shift, __be64 *pas, int access_flags)
201+
{
202+
return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
203+
ib_umem_num_pages(umem), pas,
204+
access_flags);
205+
}
197206
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
198207
{
199208
u64 page_size;

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
527527
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
528528
u64 virt_addr, int access_flags,
529529
struct ib_udata *udata);
530+
int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
531+
int npages, int zap);
530532
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
531533
int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
532534
struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
@@ -558,6 +560,9 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
558560
void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
559561
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
560562
int *ncont, int *order);
563+
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
564+
int page_shift, size_t offset, size_t num_pages,
565+
__be64 *pas, int access_flags);
561566
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
562567
int page_shift, __be64 *pas, int access_flags);
563568
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 129 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,13 @@ enum {
4444
MAX_PENDING_REG_MR = 8,
4545
};
4646

47-
enum {
48-
MLX5_UMR_ALIGN = 2048
49-
};
47+
#define MLX5_UMR_ALIGN 2048
48+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
49+
static __be64 mlx5_ib_update_mtt_emergency_buffer[
50+
MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
51+
__aligned(MLX5_UMR_ALIGN);
52+
static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
53+
#endif
5054

5155
static int order2idx(struct mlx5_ib_dev *dev, int order)
5256
{
@@ -822,6 +826,128 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
822826
return mr;
823827
}
824828

829+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
830+
int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
831+
int zap)
832+
{
833+
struct mlx5_ib_dev *dev = mr->dev;
834+
struct device *ddev = dev->ib_dev.dma_device;
835+
struct umr_common *umrc = &dev->umrc;
836+
struct mlx5_ib_umr_context umr_context;
837+
struct ib_umem *umem = mr->umem;
838+
int size;
839+
__be64 *pas;
840+
dma_addr_t dma;
841+
struct ib_send_wr wr, *bad;
842+
struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
843+
struct ib_sge sg;
844+
int err = 0;
845+
const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
846+
const int page_index_mask = page_index_alignment - 1;
847+
size_t pages_mapped = 0;
848+
size_t pages_to_map = 0;
849+
size_t pages_iter = 0;
850+
int use_emergency_buf = 0;
851+
852+
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
853+
* so we need to align the offset and length accordingly */
854+
if (start_page_index & page_index_mask) {
855+
npages += start_page_index & page_index_mask;
856+
start_page_index &= ~page_index_mask;
857+
}
858+
859+
pages_to_map = ALIGN(npages, page_index_alignment);
860+
861+
if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
862+
return -EINVAL;
863+
864+
size = sizeof(u64) * pages_to_map;
865+
size = min_t(int, PAGE_SIZE, size);
866+
/* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
867+
* code, when we are called from an invalidation. The pas buffer must
868+
* be 2k-aligned for Connect-IB. */
869+
pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
870+
if (!pas) {
871+
mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
872+
pas = mlx5_ib_update_mtt_emergency_buffer;
873+
size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
874+
use_emergency_buf = 1;
875+
mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
876+
memset(pas, 0, size);
877+
}
878+
pages_iter = size / sizeof(u64);
879+
dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
880+
if (dma_mapping_error(ddev, dma)) {
881+
mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
882+
err = -ENOMEM;
883+
goto free_pas;
884+
}
885+
886+
for (pages_mapped = 0;
887+
pages_mapped < pages_to_map && !err;
888+
pages_mapped += pages_iter, start_page_index += pages_iter) {
889+
dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
890+
891+
npages = min_t(size_t,
892+
pages_iter,
893+
ib_umem_num_pages(umem) - start_page_index);
894+
895+
if (!zap) {
896+
__mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
897+
start_page_index, npages, pas,
898+
MLX5_IB_MTT_PRESENT);
899+
/* Clear padding after the pages brought from the
900+
* umem. */
901+
memset(pas + npages, 0, size - npages * sizeof(u64));
902+
}
903+
904+
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
905+
906+
memset(&wr, 0, sizeof(wr));
907+
wr.wr_id = (u64)(unsigned long)&umr_context;
908+
909+
sg.addr = dma;
910+
sg.length = ALIGN(npages * sizeof(u64),
911+
MLX5_UMR_MTT_ALIGNMENT);
912+
sg.lkey = dev->umrc.mr->lkey;
913+
914+
wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
915+
MLX5_IB_SEND_UMR_UPDATE_MTT;
916+
wr.sg_list = &sg;
917+
wr.num_sge = 1;
918+
wr.opcode = MLX5_IB_WR_UMR;
919+
umrwr->npages = sg.length / sizeof(u64);
920+
umrwr->page_shift = PAGE_SHIFT;
921+
umrwr->mkey = mr->mmr.key;
922+
umrwr->target.offset = start_page_index;
923+
924+
mlx5_ib_init_umr_context(&umr_context);
925+
down(&umrc->sem);
926+
err = ib_post_send(umrc->qp, &wr, &bad);
927+
if (err) {
928+
mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
929+
} else {
930+
wait_for_completion(&umr_context.done);
931+
if (umr_context.status != IB_WC_SUCCESS) {
932+
mlx5_ib_err(dev, "UMR completion failed, code %d\n",
933+
umr_context.status);
934+
err = -EFAULT;
935+
}
936+
}
937+
up(&umrc->sem);
938+
}
939+
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
940+
941+
free_pas:
942+
if (!use_emergency_buf)
943+
free_page((unsigned long)pas);
944+
else
945+
mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
946+
947+
return err;
948+
}
949+
#endif
950+
825951
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
826952
u64 length, struct ib_umem *umem,
827953
int npages, int page_shift,

include/linux/mlx5/device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ enum {
200200

201201
#define MLX5_UMR_MTT_ALIGNMENT 0x40
202202
#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
203+
#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
203204

204205
enum mlx5_event {
205206
MLX5_EVENT_TYPE_COMP = 0x0,

0 commit comments

Comments
 (0)