Skip to content

Commit 7219ab3

Browse files
Tariq Toukandavem330
authored andcommitted
net/mlx5e: CQE compression
CQE compression feature is meant to save PCIe bandwidth by compressing few CQEs into smaller amount of bytes on PCIe. CQE compression can be selectively enabled per CQ. By default is disabled for now and will be enabled later on. Signed-off-by: Tariq Toukan <[email protected]> Signed-off-by: Eugenia Emantayev <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent c1869d5 commit 7219ab3

File tree

6 files changed

+211
-2
lines changed

6 files changed

+211
-2
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ struct mlx5e_params {
157157
u8 log_rq_size;
158158
u16 num_channels;
159159
u8 num_tc;
160+
bool rx_cqe_compress_admin;
161+
bool rx_cqe_compress;
160162
u16 rx_cq_moderation_usec;
161163
u16 rx_cq_moderation_pkts;
162164
u16 tx_cq_moderation_usec;
@@ -202,6 +204,13 @@ struct mlx5e_cq {
202204
struct mlx5e_channel *channel;
203205
struct mlx5e_priv *priv;
204206

207+
/* cqe decompression */
208+
struct mlx5_cqe64 title;
209+
struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
210+
u8 mini_arr_idx;
211+
u16 decmprs_left;
212+
u16 decmprs_wqe_counter;
213+
205214
/* control */
206215
struct mlx5_wq_ctrl wq_ctrl;
207216
} ____cacheline_aligned_in_smp;
@@ -616,6 +625,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv);
616625
void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv);
617626
int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr);
618627
int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr);
628+
void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val);
619629

620630
int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
621631
u16 vid);

drivers/net/ethernet/mellanox/mlx5/core/en_clock.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
9393
/* RX HW timestamp */
9494
switch (config.rx_filter) {
9595
case HWTSTAMP_FILTER_NONE:
96+
/* Reset CQE compression to Admin default */
97+
mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin);
9698
break;
9799
case HWTSTAMP_FILTER_ALL:
98100
case HWTSTAMP_FILTER_SOME:
@@ -108,6 +110,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
108110
case HWTSTAMP_FILTER_PTP_V2_EVENT:
109111
case HWTSTAMP_FILTER_PTP_V2_SYNC:
110112
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
113+
/* Disable CQE compression */
114+
mlx5e_modify_rx_cqe_compression(priv, false);
111115
config.rx_filter = HWTSTAMP_FILTER_ALL;
112116
break;
113117
default:

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
114114
s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
115115
s->rx_mpwqe_frag += rq_stats->mpwqe_frag;
116116
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
117+
s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
118+
s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
117119

118120
for (j = 0; j < priv->params.num_tc; j++) {
119121
sq_stats = &priv->channel[i]->sq[j].stats;
@@ -1204,6 +1206,10 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
12041206
}
12051207

12061208
MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
1209+
if (priv->params.rx_cqe_compress) {
1210+
MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1211+
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
1212+
}
12071213

12081214
mlx5e_build_common_cq_param(priv, param);
12091215
}

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 149 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,143 @@ static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
4242
return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
4343
}
4444

45+
static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
46+
void *data)
47+
{
48+
u32 ci = cqcc & cq->wq.sz_m1;
49+
50+
memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
51+
}
52+
53+
static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
54+
struct mlx5e_cq *cq, u32 cqcc)
55+
{
56+
mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
57+
cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt);
58+
cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
59+
rq->stats.cqe_compress_blks++;
60+
}
61+
62+
static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
63+
{
64+
mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr);
65+
cq->mini_arr_idx = 0;
66+
}
67+
68+
static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
69+
{
70+
u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
71+
u32 wq_sz = 1 << cq->wq.log_sz;
72+
u32 ci = cqcc & cq->wq.sz_m1;
73+
u32 ci_top = min_t(u32, wq_sz, ci + n);
74+
75+
for (; ci < ci_top; ci++, n--) {
76+
struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
77+
78+
cqe->op_own = op_own;
79+
}
80+
81+
if (unlikely(ci == wq_sz)) {
82+
op_own = !op_own;
83+
for (ci = 0; ci < n; ci++) {
84+
struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
85+
86+
cqe->op_own = op_own;
87+
}
88+
}
89+
}
90+
91+
static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
92+
struct mlx5e_cq *cq, u32 cqcc)
93+
{
94+
u16 wqe_cnt_step;
95+
96+
cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
97+
cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum;
98+
cq->title.op_own &= 0xf0;
99+
cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz);
100+
cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter);
101+
102+
wqe_cnt_step =
103+
rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
104+
mpwrq_get_cqe_consumed_strides(&cq->title) : 1;
105+
cq->decmprs_wqe_counter =
106+
(cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1;
107+
}
108+
109+
static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
110+
struct mlx5e_cq *cq, u32 cqcc)
111+
{
112+
mlx5e_decompress_cqe(rq, cq, cqcc);
113+
cq->title.rss_hash_type = 0;
114+
cq->title.rss_hash_result = 0;
115+
}
116+
117+
static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
118+
struct mlx5e_cq *cq,
119+
int update_owner_only,
120+
int budget_rem)
121+
{
122+
u32 cqcc = cq->wq.cc + update_owner_only;
123+
u32 cqe_count;
124+
u32 i;
125+
126+
cqe_count = min_t(u32, cq->decmprs_left, budget_rem);
127+
128+
for (i = update_owner_only; i < cqe_count;
129+
i++, cq->mini_arr_idx++, cqcc++) {
130+
if (unlikely(cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE))
131+
mlx5e_read_mini_arr_slot(cq, cqcc);
132+
133+
mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
134+
rq->handle_rx_cqe(rq, &cq->title);
135+
}
136+
mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
137+
cq->wq.cc = cqcc;
138+
cq->decmprs_left -= cqe_count;
139+
rq->stats.cqe_compress_pkts += cqe_count;
140+
141+
return cqe_count;
142+
}
143+
144+
static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
145+
struct mlx5e_cq *cq,
146+
int budget_rem)
147+
{
148+
mlx5e_read_title_slot(rq, cq, cq->wq.cc);
149+
mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1);
150+
mlx5e_decompress_cqe(rq, cq, cq->wq.cc);
151+
rq->handle_rx_cqe(rq, &cq->title);
152+
cq->mini_arr_idx++;
153+
154+
return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
155+
}
156+
157+
void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val)
158+
{
159+
bool was_opened;
160+
161+
if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
162+
return;
163+
164+
mutex_lock(&priv->state_lock);
165+
166+
if (priv->params.rx_cqe_compress == val)
167+
goto unlock;
168+
169+
was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
170+
if (was_opened)
171+
mlx5e_close_locked(priv->netdev);
172+
173+
priv->params.rx_cqe_compress = val;
174+
175+
if (was_opened)
176+
mlx5e_open_locked(priv->netdev);
177+
178+
unlock:
179+
mutex_unlock(&priv->state_lock);
180+
}
181+
45182
int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
46183
{
47184
struct sk_buff *skb;
@@ -738,14 +875,24 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
738875
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
739876
{
740877
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
741-
int work_done;
878+
int work_done = 0;
742879

743-
for (work_done = 0; work_done < budget; work_done++) {
880+
if (cq->decmprs_left)
881+
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
882+
883+
for (; work_done < budget; work_done++) {
744884
struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq);
745885

746886
if (!cqe)
747887
break;
748888

889+
if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
890+
work_done +=
891+
mlx5e_decompress_cqes_start(rq, cq,
892+
budget - work_done);
893+
continue;
894+
}
895+
749896
mlx5_cqwq_pop(&cq->wq);
750897

751898
rq->handle_rx_cqe(rq, cqe);

drivers/net/ethernet/mellanox/mlx5/core/en_stats.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ struct mlx5e_sw_stats {
7272
u64 rx_mpwqe_filler;
7373
u64 rx_mpwqe_frag;
7474
u64 rx_buff_alloc_err;
75+
u64 rx_cqe_compress_blks;
76+
u64 rx_cqe_compress_pkts;
7577

7678
/* Special handling counters */
7779
u64 link_down_events;
@@ -101,6 +103,8 @@ static const struct counter_desc sw_stats_desc[] = {
101103
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
102104
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
103105
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
106+
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
107+
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
104108
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) },
105109
};
106110

@@ -283,6 +287,8 @@ struct mlx5e_rq_stats {
283287
u64 mpwqe_filler;
284288
u64 mpwqe_frag;
285289
u64 buff_alloc_err;
290+
u64 cqe_compress_blks;
291+
u64 cqe_compress_pkts;
286292
};
287293

288294
static const struct counter_desc rq_stats_desc[] = {
@@ -297,6 +303,8 @@ static const struct counter_desc rq_stats_desc[] = {
297303
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
298304
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
299305
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
306+
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
307+
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
300308
};
301309

302310
struct mlx5e_sq_stats {

include/linux/mlx5/device.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,40 @@ struct mlx5_cqe64 {
685685
u8 op_own;
686686
};
687687

688+
struct mlx5_mini_cqe8 {
689+
union {
690+
__be32 rx_hash_result;
691+
struct {
692+
__be16 checksum;
693+
__be16 rsvd;
694+
};
695+
struct {
696+
__be16 wqe_counter;
697+
u8 s_wqe_opcode;
698+
u8 reserved;
699+
} s_wqe_info;
700+
};
701+
__be32 byte_cnt;
702+
};
703+
704+
enum {
705+
MLX5_NO_INLINE_DATA,
706+
MLX5_INLINE_DATA32_SEG,
707+
MLX5_INLINE_DATA64_SEG,
708+
MLX5_COMPRESSED,
709+
};
710+
711+
enum {
712+
MLX5_CQE_FORMAT_CSUM = 0x1,
713+
};
714+
715+
#define MLX5_MINI_CQE_ARRAY_SIZE 8
716+
717+
static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
718+
{
719+
return (cqe->op_own >> 2) & 0x3;
720+
}
721+
688722
static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
689723
{
690724
return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;

0 commit comments

Comments
 (0)