40
40
#include <linux/netdevice.h>
41
41
#include <linux/security.h>
42
42
#include <linux/notifier.h>
43
+ #include <linux/hashtable.h>
43
44
#include <rdma/rdma_netlink.h>
44
45
#include <rdma/ib_addr.h>
45
46
#include <rdma/ib_cache.h>
@@ -134,6 +135,10 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
134
135
!xa_is_err(entry); \
135
136
(index)++, entry = xan_find_marked(xa, &(index), filter))
136
137
138
+ /* RCU hash table mapping netdevice pointers to struct ib_port_data */
139
+ static DEFINE_SPINLOCK (ndev_hash_lock );
140
+ static DECLARE_HASHTABLE (ndev_hash , 5 ) ;
141
+
137
142
static void free_netdevs (struct ib_device * ib_dev );
138
143
static int ib_security_change (struct notifier_block * nb , unsigned long event ,
139
144
void * lsm_data );
@@ -144,6 +149,12 @@ static struct notifier_block ibdev_lsm_nb = {
144
149
.notifier_call = ib_security_change ,
145
150
};
146
151
152
+ /* Pointer to the RCU head at the start of the ib_port_data array */
153
+ struct ib_port_data_rcu {
154
+ struct rcu_head rcu_head ;
155
+ struct ib_port_data pdata [];
156
+ };
157
+
147
158
static int ib_device_check_mandatory (struct ib_device * device )
148
159
{
149
160
#define IB_MANDATORY_FUNC (x ) { offsetof(struct ib_device_ops, x), #x }
@@ -295,9 +306,12 @@ static void ib_device_release(struct device *device)
295
306
WARN_ON (refcount_read (& dev -> refcount ));
296
307
ib_cache_release_one (dev );
297
308
ib_security_release_port_pkey_list (dev );
298
- kfree (dev -> port_data );
299
309
xa_destroy (& dev -> client_data );
300
- kfree (dev );
310
+ if (dev -> port_data )
311
+ kfree_rcu (container_of (dev -> port_data , struct ib_port_data_rcu ,
312
+ pdata [0 ]),
313
+ rcu_head );
314
+ kfree_rcu (dev , rcu_head );
301
315
}
302
316
303
317
static int ib_device_uevent (struct device * device ,
@@ -468,6 +482,7 @@ static void remove_client_context(struct ib_device *device,
468
482
469
483
static int alloc_port_data (struct ib_device * device )
470
484
{
485
+ struct ib_port_data_rcu * pdata_rcu ;
471
486
unsigned int port ;
472
487
473
488
if (device -> port_data )
@@ -484,17 +499,26 @@ static int alloc_port_data(struct ib_device *device)
484
499
* Therefore port_data is declared as a 1 based array with potential
485
500
* empty slots at the beginning.
486
501
*/
487
- device -> port_data = kcalloc (rdma_end_port (device ) + 1 ,
488
- sizeof (* device -> port_data ), GFP_KERNEL );
489
- if (!device -> port_data )
502
+ pdata_rcu = kzalloc (struct_size (pdata_rcu , pdata ,
503
+ rdma_end_port (device ) + 1 ),
504
+ GFP_KERNEL );
505
+ if (!pdata_rcu )
490
506
return - ENOMEM ;
507
+ /*
508
+ * The rcu_head is put in front of the port data array and the stored
509
+ * pointer is adjusted since we never need to see that member until
510
+ * kfree_rcu.
511
+ */
512
+ device -> port_data = pdata_rcu -> pdata ;
491
513
492
514
rdma_for_each_port (device , port ) {
493
515
struct ib_port_data * pdata = & device -> port_data [port ];
494
516
517
+ pdata -> ib_dev = device ;
495
518
spin_lock_init (& pdata -> pkey_list_lock );
496
519
INIT_LIST_HEAD (& pdata -> pkey_list );
497
520
spin_lock_init (& pdata -> netdev_lock );
521
+ INIT_HLIST_NODE (& pdata -> ndev_hash_link );
498
522
}
499
523
return 0 ;
500
524
}
@@ -1042,6 +1066,29 @@ int ib_query_port(struct ib_device *device,
1042
1066
}
1043
1067
EXPORT_SYMBOL (ib_query_port );
1044
1068
1069
+ static void add_ndev_hash (struct ib_port_data * pdata )
1070
+ {
1071
+ unsigned long flags ;
1072
+
1073
+ might_sleep ();
1074
+
1075
+ spin_lock_irqsave (& ndev_hash_lock , flags );
1076
+ if (hash_hashed (& pdata -> ndev_hash_link )) {
1077
+ hash_del_rcu (& pdata -> ndev_hash_link );
1078
+ spin_unlock_irqrestore (& ndev_hash_lock , flags );
1079
+ /*
1080
+ * We cannot do hash_add_rcu after a hash_del_rcu until the
1081
+ * grace period
1082
+ */
1083
+ synchronize_rcu ();
1084
+ spin_lock_irqsave (& ndev_hash_lock , flags );
1085
+ }
1086
+ if (pdata -> netdev )
1087
+ hash_add_rcu (ndev_hash , & pdata -> ndev_hash_link ,
1088
+ (uintptr_t )pdata -> netdev );
1089
+ spin_unlock_irqrestore (& ndev_hash_lock , flags );
1090
+ }
1091
+
1045
1092
/**
1046
1093
* ib_device_set_netdev - Associate the ib_dev with an underlying net_device
1047
1094
* @ib_dev: Device to modify
@@ -1078,17 +1125,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
1078
1125
1079
1126
pdata = & ib_dev -> port_data [port ];
1080
1127
spin_lock_irqsave (& pdata -> netdev_lock , flags );
1081
- if (pdata -> netdev == ndev ) {
1128
+ old_ndev = rcu_dereference_protected (
1129
+ pdata -> netdev , lockdep_is_held (& pdata -> netdev_lock ));
1130
+ if (old_ndev == ndev ) {
1082
1131
spin_unlock_irqrestore (& pdata -> netdev_lock , flags );
1083
1132
return 0 ;
1084
1133
}
1085
- old_ndev = pdata -> netdev ;
1086
1134
1087
1135
if (ndev )
1088
1136
dev_hold (ndev );
1089
- pdata -> netdev = ndev ;
1137
+ rcu_assign_pointer ( pdata -> netdev , ndev ) ;
1090
1138
spin_unlock_irqrestore (& pdata -> netdev_lock , flags );
1091
1139
1140
+ add_ndev_hash (pdata );
1092
1141
if (old_ndev )
1093
1142
dev_put (old_ndev );
1094
1143
@@ -1103,11 +1152,24 @@ static void free_netdevs(struct ib_device *ib_dev)
1103
1152
1104
1153
rdma_for_each_port (ib_dev , port ) {
1105
1154
struct ib_port_data * pdata = & ib_dev -> port_data [port ];
1155
+ struct net_device * ndev ;
1106
1156
1107
1157
spin_lock_irqsave (& pdata -> netdev_lock , flags );
1108
- if (pdata -> netdev ) {
1109
- dev_put (pdata -> netdev );
1110
- pdata -> netdev = NULL ;
1158
+ ndev = rcu_dereference_protected (
1159
+ pdata -> netdev , lockdep_is_held (& pdata -> netdev_lock ));
1160
+ if (ndev ) {
1161
+ spin_lock (& ndev_hash_lock );
1162
+ hash_del_rcu (& pdata -> ndev_hash_link );
1163
+ spin_unlock (& ndev_hash_lock );
1164
+
1165
+ /*
1166
+ * If this is the last dev_put there is still a
1167
+ * synchronize_rcu before the netdev is kfreed, so we
1168
+ * can continue to rely on unlocked pointer
1169
+ * comparisons after the put
1170
+ */
1171
+ rcu_assign_pointer (pdata -> netdev , NULL );
1172
+ dev_put (ndev );
1111
1173
}
1112
1174
spin_unlock_irqrestore (& pdata -> netdev_lock , flags );
1113
1175
}
@@ -1132,7 +1194,8 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
1132
1194
res = ib_dev -> ops .get_netdev (ib_dev , port );
1133
1195
else {
1134
1196
spin_lock (& pdata -> netdev_lock );
1135
- res = pdata -> netdev ;
1197
+ res = rcu_dereference_protected (
1198
+ pdata -> netdev , lockdep_is_held (& pdata -> netdev_lock ));
1136
1199
if (res )
1137
1200
dev_hold (res );
1138
1201
spin_unlock (& pdata -> netdev_lock );
@@ -1150,6 +1213,38 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
1150
1213
return res ;
1151
1214
}
1152
1215
1216
+ /**
1217
+ * ib_device_get_by_netdev - Find an IB device associated with a netdev
1218
+ * @ndev: netdev to locate
1219
+ * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
1220
+ *
1221
+ * Find and hold an ib_device that is associated with a netdev via
1222
+ * ib_device_set_netdev(). The caller must call ib_device_put() on the
1223
+ * returned pointer.
1224
+ */
1225
+ struct ib_device * ib_device_get_by_netdev (struct net_device * ndev ,
1226
+ enum rdma_driver_id driver_id )
1227
+ {
1228
+ struct ib_device * res = NULL ;
1229
+ struct ib_port_data * cur ;
1230
+
1231
+ rcu_read_lock ();
1232
+ hash_for_each_possible_rcu (ndev_hash , cur , ndev_hash_link ,
1233
+ (uintptr_t )ndev ) {
1234
+ if (rcu_access_pointer (cur -> netdev ) == ndev &&
1235
+ (driver_id == RDMA_DRIVER_UNKNOWN ||
1236
+ cur -> ib_dev -> driver_id == driver_id ) &&
1237
+ ib_device_try_get (cur -> ib_dev )) {
1238
+ res = cur -> ib_dev ;
1239
+ break ;
1240
+ }
1241
+ }
1242
+ rcu_read_unlock ();
1243
+
1244
+ return res ;
1245
+ }
1246
+ EXPORT_SYMBOL (ib_device_get_by_netdev );
1247
+
1153
1248
/**
1154
1249
* ib_enum_roce_netdev - enumerate all RoCE ports
1155
1250
* @ib_dev : IB device we want to query
0 commit comments