Skip to content

Commit 064e526

Browse files
Idan Bursteindledford
authored andcommitted
IB/mlx5: posting klm/mtt list inline in the send queue for reg_wr
As most kernel RDMA ULPs, (e.g. NVMe over Fabrics in its default "register_always=Y" mode) registers and invalidates user buffer upon each IO. Today the mlx5 driver is posting the registration work request using scatter/gather entry for the MTT/KLM list. The fetch of the MTT/KLM list becomes the bottleneck in number of IO operation could be done by NVMe over Fabrics host driver on a single adapter as shown below. This patch is adding the support for inline registration work request upon MTT/KLM list of size <=64B. The result for NVMe over Fabrics is increase of > x3.5 for small IOs as shown below, I expect other ULPs (e.g iSER, SRP, NFS over RDMA) performance to be enhanced as well. The following results were taken against a single NVMe-oF (RoCE link layer) subsystem with a single namespace backed by null_blk using fio benchmark (with rw=randread, numjobs=48, iodepth={16,64}, ioengine=libaio direct=1): ConnectX-5 (pci Width x16) --------------------------- Block Size s/g reg_wr inline reg_wr ++++++++++ +++++++++++++++ ++++++++++++++++ 512B 1302.8K/34.82% 4951.9K/99.02% 1KB 1284.3K/33.86% 4232.7K/98.09% 2KB 1238.6K/34.1% 2797.5K/80.04% 4KB 1169.3K/32.46% 1941.3K/61.35% 8KB 1013.4K/30.08% 1236.6K/39.47% 16KB 695.7K/20.19% 696.9K/20.59% 32KB 350.3K/9.64% 350.6K/10.3% 64KB 175.86K/5.27% 175.9K/5.28% ConnectX-4 (pci Width x8) --------------------------- Block Size s/g reg_wr inline reg_wr ++++++++++ +++++++++++++++ ++++++++++++++++ 512B 1285.8K/42.66% 4242.7K/98.18% 1KB 1254.1K/41.74% 3569.2K/96.00% 2KB 1185.9K/39.83% 2173.9K/75.58% 4KB 1069.4K/36.46% 1343.3K/47.47% 8KB 755.1K/27.77% 748.7K/29.14% Tested-by: Nitzan Carmi <[email protected]> Signed-off-by: Idan Burstein <[email protected]> Signed-off-by: Max Gurtovoy <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent ed3dd9b commit 064e526

File tree

1 file changed

+36
-7
lines changed
  • drivers/infiniband/hw/mlx5

1 file changed

+36
-7
lines changed

drivers/infiniband/hw/mlx5/qp.c

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ enum {
5454

5555
enum {
5656
MLX5_IB_SQ_STRIDE = 6,
57+
MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64,
5758
};
5859

5960
static const u32 mlx5_ib_opcode[] = {
@@ -298,7 +299,9 @@ static int sq_overhead(struct ib_qp_init_attr *attr)
298299
max(sizeof(struct mlx5_wqe_atomic_seg) +
299300
sizeof(struct mlx5_wqe_raddr_seg),
300301
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
301-
sizeof(struct mlx5_mkey_seg));
302+
sizeof(struct mlx5_mkey_seg) +
303+
MLX5_IB_SQ_UMR_INLINE_THRESHOLD /
304+
MLX5_IB_UMR_OCTOWORD);
302305
break;
303306

304307
case IB_QPT_XRC_TGT:
@@ -3633,13 +3636,15 @@ static __be64 sig_mkey_mask(void)
36333636
}
36343637

36353638
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
3636-
struct mlx5_ib_mr *mr)
3639+
struct mlx5_ib_mr *mr, bool umr_inline)
36373640
{
36383641
int size = mr->ndescs * mr->desc_size;
36393642

36403643
memset(umr, 0, sizeof(*umr));
36413644

36423645
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
3646+
if (umr_inline)
3647+
umr->flags |= MLX5_UMR_INLINE;
36433648
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
36443649
umr->mkey_mask = frwr_mkey_mask();
36453650
}
@@ -3823,6 +3828,24 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
38233828
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
38243829
}
38253830

3831+
static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
3832+
struct mlx5_ib_mr *mr, int mr_list_size)
3833+
{
3834+
void *qend = qp->sq.qend;
3835+
void *addr = mr->descs;
3836+
int copy;
3837+
3838+
if (unlikely(seg + mr_list_size > qend)) {
3839+
copy = qend - seg;
3840+
memcpy(seg, addr, copy);
3841+
addr += copy;
3842+
mr_list_size -= copy;
3843+
seg = mlx5_get_send_wqe(qp, 0);
3844+
}
3845+
memcpy(seg, addr, mr_list_size);
3846+
seg += mr_list_size;
3847+
}
3848+
38263849
static __be32 send_ieth(struct ib_send_wr *wr)
38273850
{
38283851
switch (wr->opcode) {
@@ -4217,14 +4240,16 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
42174240
{
42184241
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
42194242
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
4243+
int mr_list_size = mr->ndescs * mr->desc_size;
4244+
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
42204245

42214246
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
42224247
mlx5_ib_warn(to_mdev(qp->ibqp.device),
42234248
"Invalid IB_SEND_INLINE send flag\n");
42244249
return -EINVAL;
42254250
}
42264251

4227-
set_reg_umr_seg(*seg, mr);
4252+
set_reg_umr_seg(*seg, mr, umr_inline);
42284253
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
42294254
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
42304255
if (unlikely((*seg == qp->sq.qend)))
@@ -4236,10 +4261,14 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
42364261
if (unlikely((*seg == qp->sq.qend)))
42374262
*seg = mlx5_get_send_wqe(qp, 0);
42384263

4239-
set_reg_data_seg(*seg, mr, pd);
4240-
*seg += sizeof(struct mlx5_wqe_data_seg);
4241-
*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
4242-
4264+
if (umr_inline) {
4265+
set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size);
4266+
*size += get_xlt_octo(mr_list_size);
4267+
} else {
4268+
set_reg_data_seg(*seg, mr, pd);
4269+
*seg += sizeof(struct mlx5_wqe_data_seg);
4270+
*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
4271+
}
42434272
return 0;
42444273
}
42454274

0 commit comments

Comments
 (0)