Skip to content

Commit 8d159eb

Browse files
chiarameiohasrleon
authored andcommitted
RDMA/mlx5: Use IB set_netdev and get_netdev functions
The IB layer provides a common interface to store and get net devices associated to an IB device port (ib_device_set_netdev() and ib_device_get_netdev()). Previously, mlx5_ib stored and managed the associated net devices internally. Replace internal net device management in mlx5_ib with ib_device_set_netdev() when attaching/detaching a net device and ib_device_get_netdev() when retrieving the net device. Export ib_device_get_netdev(). For mlx5 representors/PFs/VFs and lag creation we replace the netdev assignments with the IB set/get netdev functions. In active-backup mode lag the active slave net device is stored in the lag itself. To assure the net device stored in a lag bond IB device is the active slave we implement the following: - mlx5_core: when modifying the slave of a bond we send the internal driver event MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE. - mlx5_ib: when catching the event call ib_device_set_netdev() This patch also ensures the correct IB events are sent in switchdev lag. While at it, when in multiport eswitch mode, only a single IB device is created for all ports. The said IB device will receive all netdev events of its VFs once loaded, thus to avoid overwriting the mapping of PF IB device to PF netdev, ignore NETDEV_REGISTER events if the ib device has already been mapped to a netdev. Signed-off-by: Chiara Meiohas <[email protected]> Signed-off-by: Michael Guralnik <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 5f8ca04 commit 8d159eb

File tree

8 files changed

+191
-103
lines changed

8 files changed

+191
-103
lines changed

drivers/infiniband/core/device.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,6 +2236,9 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
22362236
if (!rdma_is_port_valid(ib_dev, port))
22372237
return NULL;
22382238

2239+
if (!ib_dev->port_data)
2240+
return NULL;
2241+
22392242
pdata = &ib_dev->port_data[port];
22402243

22412244
/*
@@ -2254,6 +2257,7 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
22542257

22552258
return res;
22562259
}
2260+
EXPORT_SYMBOL(ib_device_get_netdev);
22572261

22582262
/**
22592263
* ib_device_get_by_netdev - Find an IB device associated with a netdev

drivers/infiniband/hw/mlx5/ib_rep.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,17 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
1313
int vport_index)
1414
{
1515
struct mlx5_ib_dev *ibdev;
16+
struct net_device *ndev;
1617

1718
ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
1819
if (!ibdev)
1920
return -EINVAL;
2021

2122
ibdev->port[vport_index].rep = rep;
2223
rep->rep_data[REP_IB].priv = ibdev;
23-
write_lock(&ibdev->port[vport_index].roce.netdev_lock);
24-
ibdev->port[vport_index].roce.netdev =
25-
mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
26-
write_unlock(&ibdev->port[vport_index].roce.netdev_lock);
24+
ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
2725

28-
return 0;
26+
return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
2927
}
3028

3129
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
@@ -104,11 +102,15 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
104102
ibdev->is_rep = true;
105103
vport_index = rep->vport_index;
106104
ibdev->port[vport_index].rep = rep;
107-
ibdev->ib_dev.phys_port_cnt = num_ports;
108-
ibdev->port[vport_index].roce.netdev =
109-
mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
110105
ibdev->mdev = lag_master;
111106
ibdev->num_ports = num_ports;
107+
ibdev->ib_dev.phys_port_cnt = num_ports;
108+
ret = ib_device_set_netdev(&ibdev->ib_dev,
109+
mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
110+
rep->vport),
111+
vport_index + 1);
112+
if (ret)
113+
goto fail_add;
112114

113115
ret = __mlx5_ib_add(ibdev, profile);
114116
if (ret)
@@ -161,9 +163,8 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
161163
}
162164

163165
port = &dev->port[vport_index];
164-
write_lock(&port->roce.netdev_lock);
165-
port->roce.netdev = NULL;
166-
write_unlock(&port->roce.netdev_lock);
166+
167+
ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
167168
rep->rep_data[REP_IB].priv = NULL;
168169
port->rep = NULL;
169170

drivers/infiniband/hw/mlx5/main.c

Lines changed: 129 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -147,16 +147,52 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
147147

148148
if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
149149
continue;
150-
151-
read_lock(&port->roce.netdev_lock);
152-
rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
153-
port->rep->vport);
154-
if (rep_ndev == ndev) {
155-
read_unlock(&port->roce.netdev_lock);
150+
rep_ndev = ib_device_get_netdev(&dev->ib_dev, i + 1);
151+
if (rep_ndev && rep_ndev == ndev) {
152+
dev_put(rep_ndev);
156153
*port_num = i + 1;
157154
return &port->roce;
158155
}
159-
read_unlock(&port->roce.netdev_lock);
156+
157+
dev_put(rep_ndev);
158+
}
159+
160+
return NULL;
161+
}
162+
163+
static bool mlx5_netdev_send_event(struct mlx5_ib_dev *dev,
164+
struct net_device *ndev,
165+
struct net_device *upper,
166+
struct net_device *ib_ndev)
167+
{
168+
if (!dev->ib_active)
169+
return false;
170+
171+
/* Event is about our upper device */
172+
if (upper == ndev)
173+
return true;
174+
175+
/* RDMA device is not in lag and not in switchdev */
176+
if (!dev->is_rep && !upper && ndev == ib_ndev)
177+
return true;
178+
179+
/* RDMA devie is in switchdev */
180+
if (dev->is_rep && ndev == ib_ndev)
181+
return true;
182+
183+
return false;
184+
}
185+
186+
static struct net_device *mlx5_ib_get_rep_uplink_netdev(struct mlx5_ib_dev *ibdev)
187+
{
188+
struct mlx5_ib_port *port;
189+
int i;
190+
191+
for (i = 0; i < ibdev->num_ports; i++) {
192+
port = &ibdev->port[i];
193+
if (port->rep && port->rep->vport == MLX5_VPORT_UPLINK) {
194+
return ib_device_get_netdev(&ibdev->ib_dev, i + 1);
195+
}
160196
}
161197

162198
return NULL;
@@ -168,6 +204,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
168204
struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
169205
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
170206
u32 port_num = roce->native_port_num;
207+
struct net_device *ib_ndev = NULL;
171208
struct mlx5_core_dev *mdev;
172209
struct mlx5_ib_dev *ibdev;
173210

@@ -181,29 +218,38 @@ static int mlx5_netdev_event(struct notifier_block *this,
181218
/* Should already be registered during the load */
182219
if (ibdev->is_rep)
183220
break;
184-
write_lock(&roce->netdev_lock);
221+
222+
ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
223+
/* Exit if already registered */
224+
if (ib_ndev)
225+
goto put_ndev;
226+
185227
if (ndev->dev.parent == mdev->device)
186-
roce->netdev = ndev;
187-
write_unlock(&roce->netdev_lock);
228+
ib_device_set_netdev(&ibdev->ib_dev, ndev, port_num);
188229
break;
189230

190231
case NETDEV_UNREGISTER:
191232
/* In case of reps, ib device goes away before the netdevs */
192-
write_lock(&roce->netdev_lock);
193-
if (roce->netdev == ndev)
194-
roce->netdev = NULL;
195-
write_unlock(&roce->netdev_lock);
196-
break;
233+
if (ibdev->is_rep)
234+
break;
235+
ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
236+
if (ib_ndev == ndev)
237+
ib_device_set_netdev(&ibdev->ib_dev, NULL, port_num);
238+
goto put_ndev;
197239

198240
case NETDEV_CHANGE:
199241
case NETDEV_UP:
200242
case NETDEV_DOWN: {
201243
struct net_device *upper = NULL;
202244

203-
if (mlx5_lag_is_roce(mdev)) {
245+
if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
204246
struct net_device *lag_ndev;
205247

206-
lag_ndev = mlx5_lag_get_roce_netdev(mdev);
248+
if(mlx5_lag_is_roce(mdev))
249+
lag_ndev = ib_device_get_netdev(&ibdev->ib_dev, 1);
250+
else /* sriov lag */
251+
lag_ndev = mlx5_ib_get_rep_uplink_netdev(ibdev);
252+
207253
if (lag_ndev) {
208254
upper = netdev_master_upper_dev_get(lag_ndev);
209255
dev_put(lag_ndev);
@@ -216,18 +262,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
216262
roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
217263
if (!roce)
218264
return NOTIFY_DONE;
219-
if ((upper == ndev ||
220-
((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
221-
ibdev->ib_active) {
265+
266+
ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
267+
268+
if (mlx5_netdev_send_event(ibdev, ndev, upper, ib_ndev)) {
222269
struct ib_event ibev = { };
223270
enum ib_port_state port_state;
224271

225272
if (get_port_state(&ibdev->ib_dev, port_num,
226273
&port_state))
227-
goto done;
274+
goto put_ndev;
228275

229276
if (roce->last_port_state == port_state)
230-
goto done;
277+
goto put_ndev;
231278

232279
roce->last_port_state = port_state;
233280
ibev.device = &ibdev->ib_dev;
@@ -236,7 +283,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
236283
else if (port_state == IB_PORT_ACTIVE)
237284
ibev.event = IB_EVENT_PORT_ACTIVE;
238285
else
239-
goto done;
286+
goto put_ndev;
240287

241288
ibev.element.port_num = port_num;
242289
ib_dispatch_event(&ibev);
@@ -247,39 +294,13 @@ static int mlx5_netdev_event(struct notifier_block *this,
247294
default:
248295
break;
249296
}
297+
put_ndev:
298+
dev_put(ib_ndev);
250299
done:
251300
mlx5_ib_put_native_port_mdev(ibdev, port_num);
252301
return NOTIFY_DONE;
253302
}
254303

255-
static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
256-
u32 port_num)
257-
{
258-
struct mlx5_ib_dev *ibdev = to_mdev(device);
259-
struct net_device *ndev;
260-
struct mlx5_core_dev *mdev;
261-
262-
mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
263-
if (!mdev)
264-
return NULL;
265-
266-
if (mlx5_lag_is_roce(mdev)) {
267-
ndev = mlx5_lag_get_roce_netdev(mdev);
268-
goto out;
269-
}
270-
271-
/* Ensure ndev does not disappear before we invoke dev_hold()
272-
*/
273-
read_lock(&ibdev->port[port_num - 1].roce.netdev_lock);
274-
ndev = ibdev->port[port_num - 1].roce.netdev;
275-
dev_hold(ndev);
276-
read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock);
277-
278-
out:
279-
mlx5_ib_put_native_port_mdev(ibdev, port_num);
280-
return ndev;
281-
}
282-
283304
struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
284305
u32 ib_port_num,
285306
u32 *native_port_num)
@@ -554,7 +575,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
554575
if (!put_mdev)
555576
goto out;
556577

557-
ndev = mlx5_ib_get_netdev(device, port_num);
578+
ndev = ib_device_get_netdev(device, port_num);
558579
if (!ndev)
559580
goto out;
560581

@@ -3185,6 +3206,60 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str)
31853206
fw_rev_sub(dev->mdev));
31863207
}
31873208

3209+
static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
3210+
{
3211+
struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev,
3212+
lag_events);
3213+
struct mlx5_core_dev *mdev = dev->mdev;
3214+
struct mlx5_ib_port *port;
3215+
struct net_device *ndev;
3216+
int i, err;
3217+
int portnum;
3218+
3219+
portnum = 0;
3220+
switch (event) {
3221+
case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE:
3222+
ndev = data;
3223+
if (ndev) {
3224+
if (!mlx5_lag_is_roce(mdev)) {
3225+
// sriov lag
3226+
for (i = 0; i < dev->num_ports; i++) {
3227+
port = &dev->port[i];
3228+
if (port->rep && port->rep->vport ==
3229+
MLX5_VPORT_UPLINK) {
3230+
portnum = i;
3231+
break;
3232+
}
3233+
}
3234+
}
3235+
err = ib_device_set_netdev(&dev->ib_dev, ndev,
3236+
portnum + 1);
3237+
dev_put(ndev);
3238+
if (err)
3239+
return err;
3240+
/* Rescan gids after new netdev assignment */
3241+
rdma_roce_rescan_device(&dev->ib_dev);
3242+
}
3243+
break;
3244+
default:
3245+
return NOTIFY_DONE;
3246+
}
3247+
return NOTIFY_OK;
3248+
}
3249+
3250+
static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev)
3251+
{
3252+
dev->lag_events.notifier_call = lag_event;
3253+
blocking_notifier_chain_register(&dev->mdev->priv.lag_nh,
3254+
&dev->lag_events);
3255+
}
3256+
3257+
static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev)
3258+
{
3259+
blocking_notifier_chain_unregister(&dev->mdev->priv.lag_nh,
3260+
&dev->lag_events);
3261+
}
3262+
31883263
static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
31893264
{
31903265
struct mlx5_core_dev *mdev = dev->mdev;
@@ -3206,6 +3281,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
32063281
goto err_destroy_vport_lag;
32073282
}
32083283

3284+
mlx5e_lag_event_register(dev);
32093285
dev->flow_db->lag_demux_ft = ft;
32103286
dev->lag_ports = mlx5_lag_get_num_ports(mdev);
32113287
dev->lag_active = true;
@@ -3223,6 +3299,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
32233299
if (dev->lag_active) {
32243300
dev->lag_active = false;
32253301

3302+
mlx5e_lag_event_unregister(dev);
32263303
mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
32273304
dev->flow_db->lag_demux_ft = NULL;
32283305

@@ -3939,7 +4016,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
39394016

39404017
for (i = 0; i < dev->num_ports; i++) {
39414018
spin_lock_init(&dev->port[i].mp.mpi_lock);
3942-
rwlock_init(&dev->port[i].roce.netdev_lock);
39434019
dev->port[i].roce.dev = dev;
39444020
dev->port[i].roce.native_port_num = i + 1;
39454021
dev->port[i].roce.last_port_state = IB_PORT_DOWN;
@@ -4204,7 +4280,6 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
42044280
.create_wq = mlx5_ib_create_wq,
42054281
.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
42064282
.destroy_wq = mlx5_ib_destroy_wq,
4207-
.get_netdev = mlx5_ib_get_netdev,
42084283
.modify_wq = mlx5_ib_modify_wq,
42094284

42104285
INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -888,8 +888,6 @@ struct mlx5_roce {
888888
/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
889889
* netdev pointer
890890
*/
891-
rwlock_t netdev_lock;
892-
struct net_device *netdev;
893891
struct notifier_block nb;
894892
struct netdev_net_notifier nn;
895893
struct notifier_block mdev_nb;
@@ -1138,6 +1136,7 @@ struct mlx5_ib_dev {
11381136
/* protect accessing data_direct_dev */
11391137
struct mutex data_direct_lock;
11401138
struct notifier_block mdev_events;
1139+
struct notifier_block lag_events;
11411140
int num_ports;
11421141
/* serialize update of capability mask
11431142
*/

0 commit comments

Comments
 (0)