Skip to content

Commit d9d9f15

Browse files
Tariq Toukandavem330
authored andcommitted
net/mlx5e: Expand WQE stride when CQE compression is enabled
Make the MPWQE/Striding RQ default configuration dynamic and not statically set at compile time. Now at driver load we set stride size and num strides dynamically. By default we use same values as before, but when CQE compression is enabled, we set larger stride size to benefit from CQE compression for larger packets. Signed-off-by: Tariq Toukan <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 7219ab3 commit d9d9f15

File tree

3 files changed

+46
-29
lines changed

3 files changed

+46
-29
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,9 @@
6464
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4
6565
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6
6666

67-
#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */
6867
#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */
69-
#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES)
70-
#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE)
71-
#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\
72-
MLX5_MPWRQ_LOG_STRIDE_SIZE)
68+
#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */
69+
#define MLX5_MPWRQ_LOG_WQE_SZ 17
7370
#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
7471
MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
7572
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
@@ -154,6 +151,8 @@ struct mlx5e_umr_wqe {
154151
struct mlx5e_params {
155152
u8 log_sq_size;
156153
u8 rq_wq_type;
154+
u8 mpwqe_log_stride_sz;
155+
u8 mpwqe_log_num_strides;
157156
u8 log_rq_size;
158157
u16 num_channels;
159158
u8 num_tc;
@@ -249,6 +248,8 @@ struct mlx5e_rq {
249248
/* control */
250249
struct mlx5_wq_ctrl wq_ctrl;
251250
u8 wq_type;
251+
u32 mpwqe_stride_sz;
252+
u32 mpwqe_num_strides;
252253
u32 rqn;
253254
struct mlx5e_channel *channel;
254255
struct mlx5e_priv *priv;
@@ -272,7 +273,7 @@ struct mlx5e_mpw_info {
272273
void (*dma_pre_sync)(struct device *pdev,
273274
struct mlx5e_mpw_info *wi,
274275
u32 wqe_offset, u32 len);
275-
void (*add_skb_frag)(struct device *pdev,
276+
void (*add_skb_frag)(struct mlx5e_rq *rq,
276277
struct sk_buff *skb,
277278
struct mlx5e_mpw_info *wi,
278279
u32 page_idx, u32 frag_offset, u32 len);

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
307307
rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
308308
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
309309

310-
rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
310+
rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
311+
rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
312+
rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
311313
byte_count = rq->wqe_sz;
312314
break;
313315
default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1130,9 +1132,9 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
11301132
switch (priv->params.rq_wq_type) {
11311133
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
11321134
MLX5_SET(wq, wq, log_wqe_num_of_strides,
1133-
MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
1135+
priv->params.mpwqe_log_num_strides - 9);
11341136
MLX5_SET(wq, wq, log_wqe_stride_size,
1135-
MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
1137+
priv->params.mpwqe_log_stride_sz - 6);
11361138
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
11371139
break;
11381140
default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1199,7 +1201,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
11991201
switch (priv->params.rq_wq_type) {
12001202
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
12011203
log_cq_size = priv->params.log_rq_size +
1202-
MLX5_MPWRQ_LOG_NUM_STRIDES;
1204+
priv->params.mpwqe_log_num_strides;
12031205
break;
12041206
default: /* MLX5_WQ_TYPE_LINKED_LIST */
12051207
log_cq_size = priv->params.log_rq_size;
@@ -2729,12 +2731,25 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
27292731
switch (priv->params.rq_wq_type) {
27302732
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
27312733
priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
2734+
priv->params.mpwqe_log_stride_sz =
2735+
priv->params.rx_cqe_compress ?
2736+
MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
2737+
MLX5_MPWRQ_LOG_STRIDE_SIZE;
2738+
priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
2739+
priv->params.mpwqe_log_stride_sz;
27322740
priv->params.lro_en = true;
27332741
break;
27342742
default: /* MLX5_WQ_TYPE_LINKED_LIST */
27352743
priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
27362744
}
27372745

2746+
mlx5_core_info(mdev,
2747+
"MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
2748+
priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
2749+
BIT(priv->params.log_rq_size),
2750+
BIT(priv->params.mpwqe_log_stride_sz),
2751+
priv->params.rx_cqe_compress_admin);
2752+
27382753
priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
27392754
BIT(priv->params.log_rq_size));
27402755
priv->params.rx_cq_moderation_usec =

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
127127

128128
for (i = update_owner_only; i < cqe_count;
129129
i++, cq->mini_arr_idx++, cqcc++) {
130-
if (unlikely(cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE))
130+
if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
131131
mlx5e_read_mini_arr_slot(cq, cqcc);
132132

133133
mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
@@ -212,6 +212,11 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
212212
return -ENOMEM;
213213
}
214214

215+
static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
216+
{
217+
return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
218+
}
219+
215220
static inline void
216221
mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev,
217222
struct mlx5e_mpw_info *wi,
@@ -230,13 +235,13 @@ mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev,
230235
}
231236

232237
static inline void
233-
mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
238+
mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq,
234239
struct sk_buff *skb,
235240
struct mlx5e_mpw_info *wi,
236241
u32 page_idx, u32 frag_offset,
237242
u32 len)
238243
{
239-
unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
244+
unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz);
240245

241246
wi->skbs_frags[page_idx]++;
242247
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
@@ -245,15 +250,15 @@ mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
245250
}
246251

247252
static inline void
248-
mlx5e_add_skb_frag_fragmented_mpwqe(struct device *pdev,
253+
mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq,
249254
struct sk_buff *skb,
250255
struct mlx5e_mpw_info *wi,
251256
u32 page_idx, u32 frag_offset,
252257
u32 len)
253258
{
254-
unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
259+
unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz);
255260

256-
dma_sync_single_for_cpu(pdev,
261+
dma_sync_single_for_cpu(rq->pdev,
257262
wi->umr.dma_info[page_idx].addr + frag_offset,
258263
len, DMA_FROM_DEVICE);
259264
wi->skbs_frags[page_idx]++;
@@ -293,7 +298,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
293298
skb_copy_to_linear_data_offset(skb, 0,
294299
page_address(dma_info->page) + offset,
295300
len);
296-
#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
297301
if (unlikely(offset + headlen > PAGE_SIZE)) {
298302
dma_info++;
299303
headlen_pg = len;
@@ -304,7 +308,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
304308
page_address(dma_info->page),
305309
len);
306310
}
307-
#endif
308311
}
309312

310313
static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix)
@@ -430,7 +433,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
430433
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
431434
if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i)))
432435
goto err_unmap;
433-
atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
436+
atomic_add(mlx5e_mpwqe_strides_per_page(rq),
434437
&wi->umr.dma_info[i].page->_count);
435438
wi->skbs_frags[i] = 0;
436439
}
@@ -449,7 +452,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
449452
while (--i >= 0) {
450453
dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
451454
PCI_DMA_FROMDEVICE);
452-
atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE,
455+
atomic_sub(mlx5e_mpwqe_strides_per_page(rq),
453456
&wi->umr.dma_info[i].page->_count);
454457
put_page(wi->umr.dma_info[i].page);
455458
}
@@ -474,7 +477,7 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
474477
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
475478
dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
476479
PCI_DMA_FROMDEVICE);
477-
atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
480+
atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
478481
&wi->umr.dma_info[i].page->_count);
479482
put_page(wi->umr.dma_info[i].page);
480483
}
@@ -524,7 +527,7 @@ static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq,
524527
*/
525528
split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER);
526529
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
527-
atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
530+
atomic_add(mlx5e_mpwqe_strides_per_page(rq),
528531
&wi->dma_info.page[i]._count);
529532
wi->skbs_frags[i] = 0;
530533
}
@@ -548,7 +551,7 @@ void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
548551
dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz,
549552
PCI_DMA_FROMDEVICE);
550553
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
551-
atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
554+
atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
552555
&wi->dma_info.page[i]._count);
553556
put_page(&wi->dma_info.page[i]);
554557
}
@@ -793,29 +796,27 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
793796
u32 cqe_bcnt,
794797
struct sk_buff *skb)
795798
{
796-
u32 consumed_bytes = ALIGN(cqe_bcnt, MLX5_MPWRQ_STRIDE_SIZE);
799+
u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz);
797800
u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
798-
u32 wqe_offset = stride_ix * MLX5_MPWRQ_STRIDE_SIZE;
801+
u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz;
799802
u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
800803
u32 page_idx = wqe_offset >> PAGE_SHIFT;
801804
u32 head_page_idx = page_idx;
802805
u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt);
803806
u32 frag_offset = head_offset + headlen;
804807
u16 byte_cnt = cqe_bcnt - headlen;
805808

806-
#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
807809
if (unlikely(frag_offset >= PAGE_SIZE)) {
808810
page_idx++;
809811
frag_offset -= PAGE_SIZE;
810812
}
811-
#endif
812813
wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes);
813814

814815
while (byte_cnt) {
815816
u32 pg_consumed_bytes =
816817
min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
817818

818-
wi->add_skb_frag(rq->pdev, skb, wi, page_idx, frag_offset,
819+
wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset,
819820
pg_consumed_bytes);
820821
byte_cnt -= pg_consumed_bytes;
821822
frag_offset = 0;
@@ -865,7 +866,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
865866
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
866867

867868
mpwrq_cqe_out:
868-
if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES))
869+
if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
869870
return;
870871

871872
wi->free_wqe(rq, wi);

0 commit comments

Comments
 (0)