@@ -147,16 +147,52 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
147
147
148
148
if (upper && port -> rep -> vport == MLX5_VPORT_UPLINK )
149
149
continue ;
150
-
151
- read_lock (& port -> roce .netdev_lock );
152
- rep_ndev = mlx5_ib_get_rep_netdev (port -> rep -> esw ,
153
- port -> rep -> vport );
154
- if (rep_ndev == ndev ) {
155
- read_unlock (& port -> roce .netdev_lock );
150
+ rep_ndev = ib_device_get_netdev (& dev -> ib_dev , i + 1 );
151
+ if (rep_ndev && rep_ndev == ndev ) {
152
+ dev_put (rep_ndev );
156
153
* port_num = i + 1 ;
157
154
return & port -> roce ;
158
155
}
159
- read_unlock (& port -> roce .netdev_lock );
156
+
157
+ dev_put (rep_ndev );
158
+ }
159
+
160
+ return NULL ;
161
+ }
162
+
163
+ static bool mlx5_netdev_send_event (struct mlx5_ib_dev * dev ,
164
+ struct net_device * ndev ,
165
+ struct net_device * upper ,
166
+ struct net_device * ib_ndev )
167
+ {
168
+ if (!dev -> ib_active )
169
+ return false;
170
+
171
+ /* Event is about our upper device */
172
+ if (upper == ndev )
173
+ return true;
174
+
175
+ /* RDMA device is not in lag and not in switchdev */
176
+ if (!dev -> is_rep && !upper && ndev == ib_ndev )
177
+ return true;
178
+
179
+ /* RDMA devie is in switchdev */
180
+ if (dev -> is_rep && ndev == ib_ndev )
181
+ return true;
182
+
183
+ return false;
184
+ }
185
+
186
+ static struct net_device * mlx5_ib_get_rep_uplink_netdev (struct mlx5_ib_dev * ibdev )
187
+ {
188
+ struct mlx5_ib_port * port ;
189
+ int i ;
190
+
191
+ for (i = 0 ; i < ibdev -> num_ports ; i ++ ) {
192
+ port = & ibdev -> port [i ];
193
+ if (port -> rep && port -> rep -> vport == MLX5_VPORT_UPLINK ) {
194
+ return ib_device_get_netdev (& ibdev -> ib_dev , i + 1 );
195
+ }
160
196
}
161
197
162
198
return NULL ;
@@ -168,6 +204,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
168
204
struct mlx5_roce * roce = container_of (this , struct mlx5_roce , nb );
169
205
struct net_device * ndev = netdev_notifier_info_to_dev (ptr );
170
206
u32 port_num = roce -> native_port_num ;
207
+ struct net_device * ib_ndev = NULL ;
171
208
struct mlx5_core_dev * mdev ;
172
209
struct mlx5_ib_dev * ibdev ;
173
210
@@ -181,29 +218,38 @@ static int mlx5_netdev_event(struct notifier_block *this,
181
218
/* Should already be registered during the load */
182
219
if (ibdev -> is_rep )
183
220
break ;
184
- write_lock (& roce -> netdev_lock );
221
+
222
+ ib_ndev = ib_device_get_netdev (& ibdev -> ib_dev , port_num );
223
+ /* Exit if already registered */
224
+ if (ib_ndev )
225
+ goto put_ndev ;
226
+
185
227
if (ndev -> dev .parent == mdev -> device )
186
- roce -> netdev = ndev ;
187
- write_unlock (& roce -> netdev_lock );
228
+ ib_device_set_netdev (& ibdev -> ib_dev , ndev , port_num );
188
229
break ;
189
230
190
231
case NETDEV_UNREGISTER :
191
232
/* In case of reps, ib device goes away before the netdevs */
192
- write_lock (& roce -> netdev_lock );
193
- if (roce -> netdev == ndev )
194
- roce -> netdev = NULL ;
195
- write_unlock (& roce -> netdev_lock );
196
- break ;
233
+ if (ibdev -> is_rep )
234
+ break ;
235
+ ib_ndev = ib_device_get_netdev (& ibdev -> ib_dev , port_num );
236
+ if (ib_ndev == ndev )
237
+ ib_device_set_netdev (& ibdev -> ib_dev , NULL , port_num );
238
+ goto put_ndev ;
197
239
198
240
case NETDEV_CHANGE :
199
241
case NETDEV_UP :
200
242
case NETDEV_DOWN : {
201
243
struct net_device * upper = NULL ;
202
244
203
- if (mlx5_lag_is_roce (mdev )) {
245
+ if (mlx5_lag_is_roce (mdev ) || mlx5_lag_is_sriov ( mdev ) ) {
204
246
struct net_device * lag_ndev ;
205
247
206
- lag_ndev = mlx5_lag_get_roce_netdev (mdev );
248
+ if (mlx5_lag_is_roce (mdev ))
249
+ lag_ndev = ib_device_get_netdev (& ibdev -> ib_dev , 1 );
250
+ else /* sriov lag */
251
+ lag_ndev = mlx5_ib_get_rep_uplink_netdev (ibdev );
252
+
207
253
if (lag_ndev ) {
208
254
upper = netdev_master_upper_dev_get (lag_ndev );
209
255
dev_put (lag_ndev );
@@ -216,18 +262,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
216
262
roce = mlx5_get_rep_roce (ibdev , ndev , upper , & port_num );
217
263
if (!roce )
218
264
return NOTIFY_DONE ;
219
- if ((upper == ndev ||
220
- ((!upper || ibdev -> is_rep ) && ndev == roce -> netdev )) &&
221
- ibdev -> ib_active ) {
265
+
266
+ ib_ndev = ib_device_get_netdev (& ibdev -> ib_dev , port_num );
267
+
268
+ if (mlx5_netdev_send_event (ibdev , ndev , upper , ib_ndev )) {
222
269
struct ib_event ibev = { };
223
270
enum ib_port_state port_state ;
224
271
225
272
if (get_port_state (& ibdev -> ib_dev , port_num ,
226
273
& port_state ))
227
- goto done ;
274
+ goto put_ndev ;
228
275
229
276
if (roce -> last_port_state == port_state )
230
- goto done ;
277
+ goto put_ndev ;
231
278
232
279
roce -> last_port_state = port_state ;
233
280
ibev .device = & ibdev -> ib_dev ;
@@ -236,7 +283,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
236
283
else if (port_state == IB_PORT_ACTIVE )
237
284
ibev .event = IB_EVENT_PORT_ACTIVE ;
238
285
else
239
- goto done ;
286
+ goto put_ndev ;
240
287
241
288
ibev .element .port_num = port_num ;
242
289
ib_dispatch_event (& ibev );
@@ -247,39 +294,13 @@ static int mlx5_netdev_event(struct notifier_block *this,
247
294
default :
248
295
break ;
249
296
}
297
+ put_ndev :
298
+ dev_put (ib_ndev );
250
299
done :
251
300
mlx5_ib_put_native_port_mdev (ibdev , port_num );
252
301
return NOTIFY_DONE ;
253
302
}
254
303
255
- static struct net_device * mlx5_ib_get_netdev (struct ib_device * device ,
256
- u32 port_num )
257
- {
258
- struct mlx5_ib_dev * ibdev = to_mdev (device );
259
- struct net_device * ndev ;
260
- struct mlx5_core_dev * mdev ;
261
-
262
- mdev = mlx5_ib_get_native_port_mdev (ibdev , port_num , NULL );
263
- if (!mdev )
264
- return NULL ;
265
-
266
- if (mlx5_lag_is_roce (mdev )) {
267
- ndev = mlx5_lag_get_roce_netdev (mdev );
268
- goto out ;
269
- }
270
-
271
- /* Ensure ndev does not disappear before we invoke dev_hold()
272
- */
273
- read_lock (& ibdev -> port [port_num - 1 ].roce .netdev_lock );
274
- ndev = ibdev -> port [port_num - 1 ].roce .netdev ;
275
- dev_hold (ndev );
276
- read_unlock (& ibdev -> port [port_num - 1 ].roce .netdev_lock );
277
-
278
- out :
279
- mlx5_ib_put_native_port_mdev (ibdev , port_num );
280
- return ndev ;
281
- }
282
-
283
304
struct mlx5_core_dev * mlx5_ib_get_native_port_mdev (struct mlx5_ib_dev * ibdev ,
284
305
u32 ib_port_num ,
285
306
u32 * native_port_num )
@@ -554,7 +575,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
554
575
if (!put_mdev )
555
576
goto out ;
556
577
557
- ndev = mlx5_ib_get_netdev (device , port_num );
578
+ ndev = ib_device_get_netdev (device , port_num );
558
579
if (!ndev )
559
580
goto out ;
560
581
@@ -3185,6 +3206,60 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str)
3185
3206
fw_rev_sub (dev -> mdev ));
3186
3207
}
3187
3208
3209
+ static int lag_event (struct notifier_block * nb , unsigned long event , void * data )
3210
+ {
3211
+ struct mlx5_ib_dev * dev = container_of (nb , struct mlx5_ib_dev ,
3212
+ lag_events );
3213
+ struct mlx5_core_dev * mdev = dev -> mdev ;
3214
+ struct mlx5_ib_port * port ;
3215
+ struct net_device * ndev ;
3216
+ int i , err ;
3217
+ int portnum ;
3218
+
3219
+ portnum = 0 ;
3220
+ switch (event ) {
3221
+ case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE :
3222
+ ndev = data ;
3223
+ if (ndev ) {
3224
+ if (!mlx5_lag_is_roce (mdev )) {
3225
+ // sriov lag
3226
+ for (i = 0 ; i < dev -> num_ports ; i ++ ) {
3227
+ port = & dev -> port [i ];
3228
+ if (port -> rep && port -> rep -> vport ==
3229
+ MLX5_VPORT_UPLINK ) {
3230
+ portnum = i ;
3231
+ break ;
3232
+ }
3233
+ }
3234
+ }
3235
+ err = ib_device_set_netdev (& dev -> ib_dev , ndev ,
3236
+ portnum + 1 );
3237
+ dev_put (ndev );
3238
+ if (err )
3239
+ return err ;
3240
+ /* Rescan gids after new netdev assignment */
3241
+ rdma_roce_rescan_device (& dev -> ib_dev );
3242
+ }
3243
+ break ;
3244
+ default :
3245
+ return NOTIFY_DONE ;
3246
+ }
3247
+ return NOTIFY_OK ;
3248
+ }
3249
+
3250
+ static void mlx5e_lag_event_register (struct mlx5_ib_dev * dev )
3251
+ {
3252
+ dev -> lag_events .notifier_call = lag_event ;
3253
+ blocking_notifier_chain_register (& dev -> mdev -> priv .lag_nh ,
3254
+ & dev -> lag_events );
3255
+ }
3256
+
3257
+ static void mlx5e_lag_event_unregister (struct mlx5_ib_dev * dev )
3258
+ {
3259
+ blocking_notifier_chain_unregister (& dev -> mdev -> priv .lag_nh ,
3260
+ & dev -> lag_events );
3261
+ }
3262
+
3188
3263
static int mlx5_eth_lag_init (struct mlx5_ib_dev * dev )
3189
3264
{
3190
3265
struct mlx5_core_dev * mdev = dev -> mdev ;
@@ -3206,6 +3281,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
3206
3281
goto err_destroy_vport_lag ;
3207
3282
}
3208
3283
3284
+ mlx5e_lag_event_register (dev );
3209
3285
dev -> flow_db -> lag_demux_ft = ft ;
3210
3286
dev -> lag_ports = mlx5_lag_get_num_ports (mdev );
3211
3287
dev -> lag_active = true;
@@ -3223,6 +3299,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
3223
3299
if (dev -> lag_active ) {
3224
3300
dev -> lag_active = false;
3225
3301
3302
+ mlx5e_lag_event_unregister (dev );
3226
3303
mlx5_destroy_flow_table (dev -> flow_db -> lag_demux_ft );
3227
3304
dev -> flow_db -> lag_demux_ft = NULL ;
3228
3305
@@ -3939,7 +4016,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
3939
4016
3940
4017
for (i = 0 ; i < dev -> num_ports ; i ++ ) {
3941
4018
spin_lock_init (& dev -> port [i ].mp .mpi_lock );
3942
- rwlock_init (& dev -> port [i ].roce .netdev_lock );
3943
4019
dev -> port [i ].roce .dev = dev ;
3944
4020
dev -> port [i ].roce .native_port_num = i + 1 ;
3945
4021
dev -> port [i ].roce .last_port_state = IB_PORT_DOWN ;
@@ -4204,7 +4280,6 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
4204
4280
.create_wq = mlx5_ib_create_wq ,
4205
4281
.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table ,
4206
4282
.destroy_wq = mlx5_ib_destroy_wq ,
4207
- .get_netdev = mlx5_ib_get_netdev ,
4208
4283
.modify_wq = mlx5_ib_modify_wq ,
4209
4284
4210
4285
INIT_RDMA_OBJ_SIZE (ib_rwq_ind_table , mlx5_ib_rwq_ind_table ,
0 commit comments