Skip to content

Commit 54d6638

Browse files
wangxi11jgunthorpe
authored andcommitted
RDMA/hns: Optimize WQE buffer size calculating process
Optimize the QP's WQE buffer parameters calculating process to make the codes more readable mainly by merging calculation of extended sge space of kernel and userspace. In addition, add some inline functions to simply codes about multi-hop addressing. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Xi Wang <[email protected]> Signed-off-by: Weihang Li <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 2929c40 commit 54d6638

File tree

3 files changed

+182
-265
lines changed

3 files changed

+182
-265
lines changed

drivers/infiniband/hw/hns/hns_roce_device.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,8 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx)
10791079
return buf->page_list[idx].map;
10801080
}
10811081

1082+
#define hr_hw_page_align(x) ALIGN(x, 1 << PAGE_ADDR_SHIFT)
1083+
10821084
static inline u64 to_hr_hw_page_addr(u64 addr)
10831085
{
10841086
return addr >> PAGE_ADDR_SHIFT;
@@ -1089,6 +1091,29 @@ static inline u32 to_hr_hw_page_shift(u32 page_shift)
10891091
return page_shift - PAGE_ADDR_SHIFT;
10901092
}
10911093

1094+
static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count)
1095+
{
1096+
if (count > 0)
1097+
return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum;
1098+
1099+
return 0;
1100+
}
1101+
1102+
static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift)
1103+
{
1104+
return hr_hw_page_align(count << buf_shift);
1105+
}
1106+
1107+
static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift)
1108+
{
1109+
return hr_hw_page_align(count << buf_shift) >> buf_shift;
1110+
}
1111+
1112+
static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift)
1113+
{
1114+
return ilog2(to_hr_hem_entries_count(count, buf_shift));
1115+
}
1116+
10921117
int hns_roce_init_uar_table(struct hns_roce_dev *dev);
10931118
int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
10941119
void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar);

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 36 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -154,47 +154,24 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
154154
unsigned int *sge_ind, int valid_num_sge)
155155
{
156156
struct hns_roce_v2_wqe_data_seg *dseg;
157-
struct ib_sge *sg;
158-
int num_in_wqe = 0;
159-
int extend_sge_num;
160-
int fi_sge_num;
161-
int se_sge_num;
162-
int shift;
163-
int i;
157+
struct ib_sge *sge = wr->sg_list;
158+
unsigned int idx = *sge_ind;
159+
int cnt = valid_num_sge;
164160

165-
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
166-
num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
167-
extend_sge_num = valid_num_sge - num_in_wqe;
168-
sg = wr->sg_list + num_in_wqe;
169-
shift = qp->mtr.hem_cfg.buf_pg_shift;
161+
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
162+
cnt -= HNS_ROCE_SGE_IN_WQE;
163+
sge += HNS_ROCE_SGE_IN_WQE;
164+
}
170165

171-
/*
172-
* Check whether wr->num_sge sges are in the same page. If not, we
173-
* should calculate how many sges in the first page and the second
174-
* page.
175-
*/
176-
dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
177-
fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
178-
(uintptr_t)dseg) /
179-
sizeof(struct hns_roce_v2_wqe_data_seg);
180-
if (extend_sge_num > fi_sge_num) {
181-
se_sge_num = extend_sge_num - fi_sge_num;
182-
for (i = 0; i < fi_sge_num; i++) {
183-
set_data_seg_v2(dseg++, sg + i);
184-
(*sge_ind)++;
185-
}
186-
dseg = hns_roce_get_extend_sge(qp,
187-
(*sge_ind) & (qp->sge.sge_cnt - 1));
188-
for (i = 0; i < se_sge_num; i++) {
189-
set_data_seg_v2(dseg++, sg + fi_sge_num + i);
190-
(*sge_ind)++;
191-
}
192-
} else {
193-
for (i = 0; i < extend_sge_num; i++) {
194-
set_data_seg_v2(dseg++, sg + i);
195-
(*sge_ind)++;
196-
}
166+
while (cnt > 0) {
167+
dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
168+
set_data_seg_v2(dseg, sge);
169+
idx++;
170+
sge++;
171+
cnt--;
197172
}
173+
174+
*sge_ind = idx;
198175
}
199176

200177
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -232,7 +209,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
232209
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
233210
1);
234211
} else {
235-
if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
212+
if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
236213
for (i = 0; i < wr->num_sge; i++) {
237214
if (likely(wr->sg_list[i].length)) {
238215
set_data_seg_v2(dseg, wr->sg_list + i);
@@ -245,8 +222,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
245222
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
246223
(*sge_ind) & (qp->sge.sge_cnt - 1));
247224

248-
for (i = 0; i < wr->num_sge &&
249-
j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
225+
for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE;
226+
i++) {
250227
if (likely(wr->sg_list[i].length)) {
251228
set_data_seg_v2(dseg, wr->sg_list + i);
252229
dseg++;
@@ -675,7 +652,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
675652
}
676653

677654
/* rq support inline data */
678-
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
655+
if (hr_qp->rq_inl_buf.wqe_cnt) {
679656
sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
680657
hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
681658
(u32)wr->num_sge;
@@ -3491,29 +3468,18 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
34913468
struct hns_roce_v2_qp_context *context,
34923469
struct hns_roce_v2_qp_context *qpc_mask)
34933470
{
3494-
if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
3495-
roce_set_field(context->byte_4_sqpn_tst,
3496-
V2_QPC_BYTE_4_SGE_SHIFT_M,
3497-
V2_QPC_BYTE_4_SGE_SHIFT_S,
3498-
ilog2((unsigned int)hr_qp->sge.sge_cnt));
3499-
else
3500-
roce_set_field(context->byte_4_sqpn_tst,
3501-
V2_QPC_BYTE_4_SGE_SHIFT_M,
3502-
V2_QPC_BYTE_4_SGE_SHIFT_S,
3503-
hr_qp->sq.max_gs >
3504-
HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ?
3505-
ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
3471+
roce_set_field(context->byte_4_sqpn_tst,
3472+
V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S,
3473+
to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
3474+
hr_qp->sge.sge_shift));
35063475

35073476
roce_set_field(context->byte_20_smac_sgid_idx,
35083477
V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
3509-
ilog2((unsigned int)hr_qp->sq.wqe_cnt));
3478+
ilog2(hr_qp->sq.wqe_cnt));
35103479

35113480
roce_set_field(context->byte_20_smac_sgid_idx,
35123481
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
3513-
(hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
3514-
hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT ||
3515-
hr_qp->ibqp.srq) ? 0 :
3516-
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
3482+
ilog2(hr_qp->rq.wqe_cnt));
35173483
}
35183484

35193485
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
@@ -3781,26 +3747,26 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
37813747

37823748
roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
37833749
V2_QPC_BYTE_12_SQ_HOP_NUM_S,
3784-
hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ?
3785-
0 : hr_dev->caps.wqe_sq_hop_num);
3750+
to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num,
3751+
hr_qp->sq.wqe_cnt));
37863752
roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
37873753
V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0);
37883754

37893755
roce_set_field(context->byte_20_smac_sgid_idx,
37903756
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
37913757
V2_QPC_BYTE_20_SGE_HOP_NUM_S,
3792-
((ibqp->qp_type == IB_QPT_GSI) ||
3793-
hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
3794-
hr_dev->caps.wqe_sge_hop_num : 0);
3758+
to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num,
3759+
hr_qp->sge.sge_cnt));
37953760
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
37963761
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
37973762
V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0);
37983763

37993764
roce_set_field(context->byte_20_smac_sgid_idx,
38003765
V2_QPC_BYTE_20_RQ_HOP_NUM_M,
38013766
V2_QPC_BYTE_20_RQ_HOP_NUM_S,
3802-
hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ?
3803-
0 : hr_dev->caps.wqe_rq_hop_num);
3767+
to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num,
3768+
hr_qp->rq.wqe_cnt));
3769+
38043770
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
38053771
V2_QPC_BYTE_20_RQ_HOP_NUM_M,
38063772
V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0);
@@ -3977,7 +3943,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
39773943
return -EINVAL;
39783944
}
39793945

3980-
if (hr_qp->sge.offset) {
3946+
if (hr_qp->sge.sge_cnt > 0) {
39813947
page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
39823948
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
39833949
hr_qp->sge.offset / page_size,
@@ -4011,15 +3977,12 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
40113977
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
40123978
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0);
40133979

4014-
context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) ||
4015-
hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
4016-
cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)) : 0;
3980+
context->sq_cur_sge_blk_addr =
3981+
cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk));
40173982
roce_set_field(context->byte_184_irrl_idx,
40183983
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
40193984
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
4020-
((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs >
4021-
HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ?
4022-
upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)) : 0);
3985+
upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
40233986
qpc_mask->sq_cur_sge_blk_addr = 0;
40243987
roce_set_field(qpc_mask->byte_184_irrl_idx,
40253988
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,

0 commit comments

Comments
 (0)