Skip to content

Commit f84ad5c

Browse files
committed
Merge tag 'mlx5-updates-2023-06-06' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-updates-2023-06-06 1) Support 4 ports VF LAG, part 2/2 2) Few extra trivial cleanup patches Shay Drory Says: ================ Support 4 ports VF LAG, part 2/2 This series continues the series[1] "Support 4 ports VF LAG, part1/2". This series adds support for 4 ports VF LAG (single FDB E-Switch). This series of patches refactoring LAG code that make assumptions about VF LAG supporting only two ports and then enable 4 ports VF LAG. Patch 1: - Fix for ib rep code Patches 2-5: - Refactors LAG layer. Patches 6-7: - Block LAG types which doesn't support 4 ports. Patch 8: - Enable 4 ports VF LAG. This series specifically allows HCAs with 4 ports to create a VF LAG with only 4 ports. It is not possible to create a VF LAG with 2 or 3 ports using HCAs that have 4 ports. Currently, the Merged E-Switch feature only supports HCAs with 2 ports. However, upcoming patches will introduce support for HCAs with 4 ports. In order to activate VF LAG a user can execute: devlink dev eswitch set pci/0000:08:00.0 mode switchdev devlink dev eswitch set pci/0000:08:00.1 mode switchdev devlink dev eswitch set pci/0000:08:00.2 mode switchdev devlink dev eswitch set pci/0000:08:00.3 mode switchdev ip link add name bond0 type bond ip link set dev bond0 type bond mode 802.3ad ip link set dev eth2 master bond0 ip link set dev eth3 master bond0 ip link set dev eth4 master bond0 ip link set dev eth5 master bond0 Where eth2, eth3, eth4 and eth5 are net-interfaces of pci/0000:08:00.0 pci/0000:08:00.1 pci/0000:08:00.2 pci/0000:08:00.3 respectively. User can verify LAG state and type via debugfs: /sys/kernel/debug/mlx5/0000\:08\:00.0/lag/state /sys/kernel/debug/mlx5/0000\:08\:00.0/lag/type [1] https://lore.kernel.org/netdev/[email protected]/T/#mf1d2083780970ba277bfe721554d4925f03f36d1 ================ * tag 'mlx5-updates-2023-06-06' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux: net/mlx5e: simplify condition after napi budget handling change mlx5/core: E-Switch, Allocate ECPF vport if it's an eswitch manager net/mlx5: Skip inline mode check after mlx5_eswitch_enable_locked() failure net/mlx5e: TC, refactor access to hash key net/mlx5e: Remove RX page cache leftovers net/mlx5e: Expose catastrophic steering error counters net/mlx5: Enable 4 ports VF LAG net/mlx5: LAG, block multiport eswitch LAG in case ldev have more than 2 ports net/mlx5: LAG, block multipath LAG in case ldev have more than 2 ports net/mlx5: LAG, change mlx5_shared_fdb_supported() to static net/mlx5: LAG, generalize handling of shared FDB net/mlx5: LAG, check if all eswitches are paired for shared FDB {net/RDMA}/mlx5: introduce lag_for_each_peer RDMA/mlx5: Free second uplink ib port ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 55b2433 + 803ea34 commit f84ad5c

File tree

18 files changed

+201
-103
lines changed

18 files changed

+201
-103
lines changed

Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,13 @@ Description of the vnic counters:
290290
- nic_receive_steering_discard
291291
number of packets that completed RX flow
292292
steering but were discarded due to a mismatch in flow table.
293+
- generated_pkt_steering_fail
294+
number of packets generated by the VNIC experiencing unexpected steering
295+
failure (at any point in steering flow).
296+
- handled_pkt_steering_fail
297+
number of packets handled by the VNIC experiencing unexpected steering
298+
failure (at any point in steering flow owned by the VNIC, including the FDB
299+
for the eswitch owner).
293300

294301
User commands examples:
295302

drivers/infiniband/hw/mlx5/ib_rep.c

Lines changed: 66 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
3030

3131
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
3232

33+
static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
34+
{
35+
struct mlx5_core_dev *peer_dev;
36+
int i;
37+
38+
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
39+
u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
40+
41+
if (mlx5_lag_is_mpesw(peer_dev))
42+
*num_ports += peer_num_ports;
43+
else
44+
/* Only 1 ib port is the representor for all uplinks */
45+
*num_ports += peer_num_ports - 1;
46+
}
47+
}
48+
3349
static int
3450
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
3551
{
3652
u32 num_ports = mlx5_eswitch_get_total_vports(dev);
53+
struct mlx5_core_dev *lag_master = dev;
3754
const struct mlx5_ib_profile *profile;
3855
struct mlx5_core_dev *peer_dev;
3956
struct mlx5_ib_dev *ibdev;
40-
int second_uplink = false;
41-
u32 peer_num_ports;
57+
int new_uplink = false;
4258
int vport_index;
4359
int ret;
60+
int i;
4461

4562
vport_index = rep->vport_index;
4663

4764
if (mlx5_lag_is_shared_fdb(dev)) {
48-
peer_dev = mlx5_lag_get_peer_mdev(dev);
49-
peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
5065
if (mlx5_lag_is_master(dev)) {
51-
if (mlx5_lag_is_mpesw(dev))
52-
num_ports += peer_num_ports;
53-
else
54-
num_ports += peer_num_ports - 1;
55-
66+
mlx5_ib_num_ports_update(dev, &num_ports);
5667
} else {
5768
if (rep->vport == MLX5_VPORT_UPLINK) {
5869
if (!mlx5_lag_is_mpesw(dev))
5970
return 0;
60-
second_uplink = true;
71+
new_uplink = true;
6172
}
73+
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
74+
u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
75+
76+
if (mlx5_lag_is_master(peer_dev))
77+
lag_master = peer_dev;
78+
else if (!mlx5_lag_is_mpesw(dev))
79+
/* Only 1 ib port is the representor for all uplinks */
80+
peer_n_ports--;
6281

63-
vport_index += peer_num_ports;
64-
dev = peer_dev;
82+
if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
83+
vport_index += peer_n_ports;
84+
}
6585
}
6686
}
6787

68-
if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
88+
if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
6989
profile = &raw_eth_profile;
7090
else
71-
return mlx5_ib_set_vport_rep(dev, rep, vport_index);
91+
return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
7292

7393
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
7494
if (!ibdev)
@@ -85,17 +105,17 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
85105
vport_index = rep->vport_index;
86106
ibdev->port[vport_index].rep = rep;
87107
ibdev->port[vport_index].roce.netdev =
88-
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
89-
ibdev->mdev = dev;
108+
mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
109+
ibdev->mdev = lag_master;
90110
ibdev->num_ports = num_ports;
91111

92112
ret = __mlx5_ib_add(ibdev, profile);
93113
if (ret)
94114
goto fail_add;
95115

96116
rep->rep_data[REP_IB].priv = ibdev;
97-
if (mlx5_lag_is_shared_fdb(dev))
98-
mlx5_ib_register_peer_vport_reps(dev);
117+
if (mlx5_lag_is_shared_fdb(lag_master))
118+
mlx5_ib_register_peer_vport_reps(lag_master);
99119

100120
return 0;
101121

@@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
118138
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
119139
int vport_index = rep->vport_index;
120140
struct mlx5_ib_port *port;
141+
int i;
121142

122143
if (WARN_ON(!mdev))
123144
return;
124145

146+
if (!dev)
147+
return;
148+
125149
if (mlx5_lag_is_shared_fdb(mdev) &&
126150
!mlx5_lag_is_master(mdev)) {
127-
struct mlx5_core_dev *peer_mdev;
128-
129-
if (rep->vport == MLX5_VPORT_UPLINK)
151+
if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
130152
return;
131-
peer_mdev = mlx5_lag_get_peer_mdev(mdev);
132-
vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
153+
for (i = 0; i < dev->num_ports; i++) {
154+
if (dev->port[i].rep == rep)
155+
break;
156+
}
157+
if (WARN_ON(i == dev->num_ports))
158+
return;
159+
vport_index = i;
133160
}
134161

135-
if (!dev)
136-
return;
137-
138162
port = &dev->port[vport_index];
139163
write_lock(&port->roce.netdev_lock);
140164
port->roce.netdev = NULL;
@@ -143,13 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
143167
port->rep = NULL;
144168

145169
if (rep->vport == MLX5_VPORT_UPLINK) {
146-
struct mlx5_core_dev *peer_mdev;
147-
struct mlx5_eswitch *esw;
170+
171+
if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
172+
return;
148173

149174
if (mlx5_lag_is_shared_fdb(mdev)) {
150-
peer_mdev = mlx5_lag_get_peer_mdev(mdev);
151-
esw = peer_mdev->priv.eswitch;
152-
mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
175+
struct mlx5_core_dev *peer_mdev;
176+
struct mlx5_eswitch *esw;
177+
178+
mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
179+
esw = peer_mdev->priv.eswitch;
180+
mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
181+
}
153182
}
154183
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
155184
}
@@ -163,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
163192

164193
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
165194
{
166-
struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
195+
struct mlx5_core_dev *peer_mdev;
167196
struct mlx5_eswitch *esw;
197+
int i;
168198

169-
if (!peer_mdev)
170-
return;
171-
172-
esw = peer_mdev->priv.eswitch;
173-
mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
199+
mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
200+
esw = peer_mdev->priv.eswitch;
201+
mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
202+
}
174203
}
175204

176205
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,

drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
7676
if (err)
7777
return err;
7878

79+
err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
80+
VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail));
81+
if (err)
82+
return err;
83+
84+
err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
85+
VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
86+
if (err)
87+
return err;
88+
7989
err = devlink_fmsg_obj_nest_end(fmsg);
8090
if (err)
8191
return err;

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -594,13 +594,6 @@ struct mlx5e_mpw_info {
594594

595595
#define MLX5E_MAX_RX_FRAGS 4
596596

597-
/* a single cache unit is capable to serve one napi call (for non-striding rq)
598-
* or a MPWQE (for striding rq).
599-
*/
600-
#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
601-
MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
602-
#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
603-
604597
struct mlx5e_rq;
605598
typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
606599
typedef struct sk_buff *

drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ struct mlx5e_tc_act_stats {
2525

2626
static const struct rhashtable_params act_counters_ht_params = {
2727
.head_offset = offsetof(struct mlx5e_tc_act_stats, hash),
28-
.key_offset = 0,
29-
.key_len = offsetof(struct mlx5e_tc_act_stats, counter),
28+
.key_offset = offsetof(struct mlx5e_tc_act_stats, tc_act_cookie),
29+
.key_len = sizeof_field(struct mlx5e_tc_act_stats, tc_act_cookie),
3030
.automatic_shrinking = true,
3131
};
3232

@@ -169,14 +169,11 @@ mlx5e_tc_act_stats_fill_stats(struct mlx5e_tc_act_stats_handle *handle,
169169
{
170170
struct rhashtable *ht = &handle->ht;
171171
struct mlx5e_tc_act_stats *item;
172-
struct mlx5e_tc_act_stats key;
173172
u64 pkts, bytes, lastused;
174173
int err = 0;
175174

176-
key.tc_act_cookie = fl_act->cookie;
177-
178175
rcu_read_lock();
179-
item = rhashtable_lookup(ht, &key, act_counters_ht_params);
176+
item = rhashtable_lookup(ht, &fl_act->cookie, act_counters_ht_params);
180177
if (!item) {
181178
rcu_read_unlock();
182179
err = -ENOENT;

drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
207207
}
208208
ch_stats->aff_change++;
209209
aff_change = true;
210-
if (budget && work_done == budget)
210+
if (work_done == budget)
211211
work_done--;
212212
}
213213

drivers/net/ethernet/mellanox/mlx5/core/eswitch.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1601,7 +1601,8 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
16011601
idx++;
16021602
}
16031603

1604-
if (mlx5_ecpf_vport_exists(dev)) {
1604+
if (mlx5_ecpf_vport_exists(dev) ||
1605+
mlx5_core_is_ecpf_esw_manager(dev)) {
16051606
err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF);
16061607
if (err)
16071608
goto err;

drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,13 @@ static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
779779
return 0;
780780
}
781781

782+
static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw)
783+
{
784+
if (mlx5_esw_allowed(esw))
785+
return esw->num_peers;
786+
return 0;
787+
}
788+
782789
static inline struct mlx5_flow_table *
783790
mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
784791
{
@@ -826,6 +833,8 @@ static inline void
826833
mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
827834
struct mlx5_eswitch *slave_esw) {}
828835

836+
static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; }
837+
829838
static inline int
830839
mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
831840
{

drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2178,6 +2178,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
21782178
"Failed setting eswitch to offloads");
21792179
esw->mode = MLX5_ESWITCH_LEGACY;
21802180
mlx5_rescan_drivers(esw->dev);
2181+
return err;
21812182
}
21822183
if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
21832184
if (mlx5_eswitch_inline_mode_get(esw,
@@ -2187,7 +2188,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
21872188
"Inline mode is different between vports");
21882189
}
21892190
}
2190-
return err;
2191+
return 0;
21912192
}
21922193

21932194
static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw,

drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -244,16 +244,22 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
244244
ft->type == FS_FT_FDB &&
245245
mlx5_lag_is_shared_fdb(dev) &&
246246
mlx5_lag_is_master(dev)) {
247-
err = mlx5_cmd_set_slave_root_fdb(dev,
248-
mlx5_lag_get_peer_mdev(dev),
249-
!disconnect, (!disconnect) ?
250-
ft->id : 0);
251-
if (err && !disconnect) {
252-
MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
253-
MLX5_SET(set_flow_table_root_in, in, table_id,
254-
ns->root_ft->id);
255-
mlx5_cmd_exec_in(dev, set_flow_table_root, in);
247+
struct mlx5_core_dev *peer_dev;
248+
int i;
249+
250+
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
251+
err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
252+
(!disconnect) ? ft->id : 0);
253+
if (err && !disconnect) {
254+
MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
255+
MLX5_SET(set_flow_table_root_in, in, table_id,
256+
ns->root_ft->id);
257+
mlx5_cmd_exec_in(dev, set_flow_table_root, in);
258+
}
259+
if (err)
260+
break;
256261
}
262+
257263
}
258264

259265
return err;

0 commit comments

Comments
 (0)