Skip to content

Commit 26abf15

Browse files
committed
Merge tag 'mlx5-updates-2022-01-06' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-updates-2022-01-06 1) Expose FEC per lane block counters via ethtool 2) Trivial fixes/updates/cleanup to mlx5e netdev driver 3) Fix htmldoc build warning 4) Spread mlx5 SFs (sub-functions) to all available CPU cores: Commits 1..5 Shay Drory Says: ================ Before this patchset, mlx5 subfunction shared the same IRQs (MSI-X) with their peers subfunctions, causing them to use same CPU cores. In large scale, this is very undesirable, SFs use small number of cpu cores and all of them will be packed on the same CPU cores, not utilizing all CPU cores in the system. In this patchset we want to achieve two things. a) Spread IRQs used by SFs to all cpu cores b) Pack less SFs in the same IRQ, will result in multiple IRQs per core. In this patchset, we spread SFs over all online cpus available to mlx5 irqs in Round-Robin manner. e.g.: Whenever a SF is created, pick the next CPU core with least number of SF IRQs bound to it, SFs will share IRQs on the same core until a certain limit, when such limit is reached, we request a new IRQ and add it to that CPU core IRQ pool, when out of IRQs, pick any IRQ with least number of SF users. This enhancement is done in order to achieve a better distribution of the SFs over all the available CPUs, which reduces application latency, as shown bellow. Machine details: Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz with 56 cores. PCI Express 3 with BW of 126 Gb/s. ConnectX-5 Ex; EDR IB (100Gb/s) and 100GbE; dual-port QSFP28; PCIe4.0 x16. Base line test description: Single SF on the system. One instance of netperf is running on-top the SF. Numbers: latency = 15.136 usec, CPU Util = 35% Test description: There are 250 SFs on the system. There are 3 instances of netperf running, on-top three different SFs, in parallel. Perf numbers: # netperf SFs latency(usec) latency CPU utilization affinity affinity (lower is better) increase % 1 cpu=0 cpu={0} ~23 (app 1-3) 35% 75% 2 cpu=0,2,4 cpu={0} app 1: 21.625 30% 68% (CPU 0) app 2-3: 16.5 9% 15% (CPU 2,4) 3 cpu=0 cpu={0,2,4} app 1: ~16 7% 84% (CPU 0) app 2-3: ~17.9 14% 22% (CPU 2,4) 4 cpu=0,2,4 cpu={0,2,4} 15.2 (app 1-3) 0% 33% (CPU 0,2,4) - The first two entries (#1 and #2) show current state. e.g.: SFs are using the same CPU. The last two entries (#3 and #4) shows the latency reduction improvement of this patch. e.g.: SFs are on different CPUs. - Whenever we use several CPUs, in case there is a different CPU utilization, write the utilization of each CPU separately. - Whenever the latency result of the netperf instances were different, write the latency of each netperf instances separately. Commands: - for netperf CPU=0: $ for i in {1..3}; do taskset -c 0 netperf -H 1${i}.1.1.1 -t TCP_RR -- \ -o RT_LATENCY -r8 & done - for netperf CPU=0,2,4 $ for i in {1..3}; do taskset -c $(( ($i - 1) * 2 )) netperf -H \ 1${i}.1.1.1 -t TCP_RR -- -o RT_LATENCY -r8 & done ================ ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents e4a3d6a + 745a130 commit 26abf15

File tree

19 files changed

+675
-236
lines changed

19 files changed

+675
-236
lines changed

Documentation/networking/devlink/mlx5.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Parameters
1717
- Validation
1818
* - ``enable_roce``
1919
- driverinit
20+
- Type: Boolean
2021
* - ``io_eq_size``
2122
- driverinit
2223
- The range is between 64 and 4096.

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,16 +1541,10 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
15411541

15421542
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
15431543
param = (struct mlx5_eq_param) {
1544-
.irq_index = MLX5_IRQ_EQ_CTRL,
15451544
.nent = MLX5_IB_NUM_PF_EQE,
15461545
};
15471546
param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
1548-
if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
1549-
err = -ENOMEM;
1550-
goto err_wq;
1551-
}
15521547
eq->core = mlx5_eq_create_generic(dev->mdev, &param);
1553-
free_cpumask_var(param.affinity);
15541548
if (IS_ERR(eq->core)) {
15551549
err = PTR_ERR(eq->core);
15561550
goto err_wq;

drivers/net/ethernet/mellanox/mlx5/core/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
109109
#
110110
# SF device
111111
#
112-
mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o
112+
mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o
113113

114114
#
115115
# SF manager

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,7 @@ struct mlx5e_profile {
984984
};
985985

986986
#define mlx5e_profile_feature_cap(profile, feature) \
987-
((profile)->features & (MLX5E_PROFILE_FEATURE_## feature))
987+
((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))
988988

989989
void mlx5e_build_ptys2ethtool_map(void);
990990

drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
120120
cancel_delayed_work_sync(&priv->stats_agent.work);
121121
}
122122

123-
int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
123+
void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
124124
{
125125
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
126126
struct mlx5_hv_vhca_agent *agent;
127127

128128
priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
129129
if (!priv->stats_agent.buf)
130-
return -ENOMEM;
130+
return;
131131

132132
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
133133
MLX5_HV_VHCA_AGENT_STATS,
@@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
142142
PTR_ERR(agent));
143143

144144
kvfree(priv->stats_agent.buf);
145-
return IS_ERR_OR_NULL(agent);
145+
return;
146146
}
147147

148148
priv->stats_agent.agent = agent;
149149
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
150-
151-
return 0;
152150
}
153151

154152
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)

drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,12 @@
77

88
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
99

10-
int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
10+
void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
1111
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
1212

1313
#else
14-
15-
static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
16-
{
17-
return 0;
18-
}
19-
20-
static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
21-
{
22-
}
14+
static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
15+
static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
2316
#endif
2417

2518
#endif /* __MLX5_EN_STATS_VHCA_H__ */

drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,10 @@ verify_uplink_forwarding(struct mlx5e_priv *priv,
4545
termination_table_raw_traffic)) {
4646
NL_SET_ERR_MSG_MOD(extack,
4747
"devices are both uplink, can't offload forwarding");
48-
pr_err("devices %s %s are both uplink, can't offload forwarding\n",
49-
priv->netdev->name, out_dev->name);
5048
return -EOPNOTSUPP;
5149
} else if (out_dev != rep_priv->netdev) {
5250
NL_SET_ERR_MSG_MOD(extack,
5351
"devices are not the same uplink, can't offload forwarding");
54-
pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
55-
priv->netdev->name, out_dev->name);
5652
return -EOPNOTSUPP;
5753
}
5854
return 0;
@@ -160,10 +156,6 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
160156
}
161157

162158
NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
163-
netdev_warn(priv->netdev,
164-
"devices %s %s not on same switch HW, can't offload forwarding\n",
165-
netdev_name(priv->netdev),
166-
out_dev->name);
167159

168160
return false;
169161
}

drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,24 +1883,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
18831883
bool is_rx_cq)
18841884
{
18851885
struct mlx5e_priv *priv = netdev_priv(netdev);
1886-
struct mlx5_core_dev *mdev = priv->mdev;
1887-
struct mlx5e_params new_params;
1888-
bool mode_changed;
18891886
u8 cq_period_mode, current_cq_period_mode;
1887+
struct mlx5e_params new_params;
1888+
1889+
if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1890+
return -EOPNOTSUPP;
1891+
1892+
cq_period_mode = cqe_mode_to_period_mode(enable);
18901893

1891-
cq_period_mode = enable ?
1892-
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
1893-
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
18941894
current_cq_period_mode = is_rx_cq ?
18951895
priv->channels.params.rx_cq_moderation.cq_period_mode :
18961896
priv->channels.params.tx_cq_moderation.cq_period_mode;
1897-
mode_changed = cq_period_mode != current_cq_period_mode;
1898-
1899-
if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
1900-
!MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
1901-
return -EOPNOTSUPP;
19021897

1903-
if (!mode_changed)
1898+
if (cq_period_mode == current_cq_period_mode)
19041899
return 0;
19051900

19061901
new_params = priv->channels.params;

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3605,11 +3605,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
36053605
new_params = priv->channels.params;
36063606

36073607
if (enable) {
3608-
if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
3609-
netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n");
3610-
err = -EINVAL;
3611-
goto out;
3612-
}
36133608
new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
36143609
new_params.packet_merge.shampo.match_criteria_type =
36153610
MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
@@ -3871,6 +3866,11 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
38713866
features &= ~NETIF_F_RXHASH;
38723867
if (netdev->features & NETIF_F_RXHASH)
38733868
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
3869+
3870+
if (features & NETIF_F_GRO_HW) {
3871+
netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
3872+
features &= ~NETIF_F_GRO_HW;
3873+
}
38743874
}
38753875

38763876
if (mlx5e_is_uplink_rep(priv))

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,6 +1603,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
16031603
}
16041604
}
16051605

1606+
static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1607+
{
1608+
trigger_report(rq, cqe);
1609+
rq->stats->wqe_err++;
1610+
}
1611+
16061612
static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
16071613
{
16081614
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
@@ -1616,8 +1622,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
16161622
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
16171623

16181624
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1619-
trigger_report(rq, cqe);
1620-
rq->stats->wqe_err++;
1625+
mlx5e_handle_rx_err_cqe(rq, cqe);
16211626
goto free_wqe;
16221627
}
16231628

@@ -1670,7 +1675,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
16701675
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
16711676

16721677
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1673-
rq->stats->wqe_err++;
1678+
mlx5e_handle_rx_err_cqe(rq, cqe);
16741679
goto free_wqe;
16751680
}
16761681

@@ -1719,8 +1724,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
17191724
wi->consumed_strides += cstrides;
17201725

17211726
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1722-
trigger_report(rq, cqe);
1723-
rq->stats->wqe_err++;
1727+
mlx5e_handle_rx_err_cqe(rq, cqe);
17241728
goto mpwrq_cqe_out;
17251729
}
17261730

@@ -1988,8 +1992,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
19881992
wi->consumed_strides += cstrides;
19891993

19901994
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1991-
trigger_report(rq, cqe);
1992-
stats->wqe_err++;
1995+
mlx5e_handle_rx_err_cqe(rq, cqe);
19931996
goto mpwrq_cqe_out;
19941997
}
19951998

@@ -2058,8 +2061,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
20582061
wi->consumed_strides += cstrides;
20592062

20602063
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2061-
trigger_report(rq, cqe);
2062-
rq->stats->wqe_err++;
2064+
mlx5e_handle_rx_err_cqe(rq, cqe);
20632065
goto mpwrq_cqe_out;
20642066
}
20652067

drivers/net/ethernet/mellanox/mlx5/core/en_stats.c

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "en_accel/tls.h"
3636
#include "en_accel/en_accel.h"
3737
#include "en/ptp.h"
38+
#include "en/port.h"
3839

3940
static unsigned int stats_grps_num(struct mlx5e_priv *priv)
4041
{
@@ -1158,12 +1159,99 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
11581159
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
11591160
}
11601161

1161-
void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
1162-
struct ethtool_fec_stats *fec_stats)
1162+
static int fec_num_lanes(struct mlx5_core_dev *dev)
1163+
{
1164+
u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {};
1165+
u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {};
1166+
int err;
1167+
1168+
MLX5_SET(pmlp_reg, in, local_port, 1);
1169+
err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
1170+
MLX5_REG_PMLP, 0, 0);
1171+
if (err)
1172+
return 0;
1173+
1174+
return MLX5_GET(pmlp_reg, out, width);
1175+
}
1176+
1177+
static int fec_active_mode(struct mlx5_core_dev *mdev)
1178+
{
1179+
unsigned long fec_active_long;
1180+
u32 fec_active;
1181+
1182+
if (mlx5e_get_fec_mode(mdev, &fec_active, NULL))
1183+
return MLX5E_FEC_NOFEC;
1184+
1185+
fec_active_long = fec_active;
1186+
return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE);
1187+
}
1188+
1189+
#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \
1190+
fec_stats->corrected_blocks.lanes[(idx)] = \
1191+
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
1192+
fc_fec_corrected_blocks_lane##idx); \
1193+
fec_stats->uncorrectable_blocks.lanes[(idx)] = \
1194+
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
1195+
fc_fec_uncorrectable_blocks_lane##idx); \
1196+
})
1197+
1198+
static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats,
1199+
u32 *ppcnt, u8 lanes)
1200+
{
1201+
if (lanes > 3) { /* 4 lanes */
1202+
MLX5E_STATS_SET_FEC_BLOCK(3);
1203+
MLX5E_STATS_SET_FEC_BLOCK(2);
1204+
}
1205+
if (lanes > 1) /* 2 lanes */
1206+
MLX5E_STATS_SET_FEC_BLOCK(1);
1207+
if (lanes > 0) /* 1 lane */
1208+
MLX5E_STATS_SET_FEC_BLOCK(0);
1209+
}
1210+
1211+
static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt)
1212+
{
1213+
fec_stats->corrected_blocks.total =
1214+
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
1215+
rs_fec_corrected_blocks);
1216+
fec_stats->uncorrectable_blocks.total =
1217+
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
1218+
rs_fec_uncorrectable_blocks);
1219+
}
1220+
1221+
static void fec_set_block_stats(struct mlx5e_priv *priv,
1222+
struct ethtool_fec_stats *fec_stats)
1223+
{
1224+
struct mlx5_core_dev *mdev = priv->mdev;
1225+
u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
1226+
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
1227+
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
1228+
int mode = fec_active_mode(mdev);
1229+
1230+
if (mode == MLX5E_FEC_NOFEC)
1231+
return;
1232+
1233+
MLX5_SET(ppcnt_reg, in, local_port, 1);
1234+
MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
1235+
if (mlx5_core_access_reg(mdev, in, sz, outl, sz, MLX5_REG_PPCNT, 0, 0))
1236+
return;
1237+
1238+
switch (mode) {
1239+
case MLX5E_FEC_RS_528_514:
1240+
case MLX5E_FEC_RS_544_514:
1241+
case MLX5E_FEC_LLRS_272_257_1:
1242+
fec_set_rs_stats(fec_stats, out);
1243+
return;
1244+
case MLX5E_FEC_FIRECODE:
1245+
fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev));
1246+
}
1247+
}
1248+
1249+
static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
1250+
struct ethtool_fec_stats *fec_stats)
11631251
{
11641252
u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
11651253
struct mlx5_core_dev *mdev = priv->mdev;
1166-
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
1254+
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
11671255
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
11681256

11691257
if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
@@ -1181,6 +1269,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
11811269
phy_corrected_bits);
11821270
}
11831271

1272+
void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
1273+
struct ethtool_fec_stats *fec_stats)
1274+
{
1275+
fec_set_corrected_bits_total(priv, fec_stats);
1276+
fec_set_block_stats(priv, fec_stats);
1277+
}
1278+
11841279
#define PPORT_ETH_EXT_OFF(c) \
11851280
MLX5_BYTE_OFF(ppcnt_reg, \
11861281
counter_set.eth_extended_cntrs_grp_data_layout.c##_high)

0 commit comments

Comments
 (0)