Skip to content

Commit c2261dd

Browse files
committed
RDMA/device: Add ib_device_set_netdev() as an alternative to get_netdev
The associated netdev should not actually be very dynamic, so for most drivers there is no reason for a callback like this. Provide an API to inform the core code about the net dev affiliation and use a core maintained data structure instead. This allows the core code to be more aware of the ndev relationship which will allow some new APIs based around this. This also uses locking that makes some kind of sense, many drivers had a confusing RCU lock, or missing locking which isn't right. Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 8faea9f commit c2261dd

File tree

6 files changed

+171
-38
lines changed

6 files changed

+171
-38
lines changed

drivers/infiniband/core/cache.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -547,21 +547,19 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
547547
unsigned long mask;
548548
int ret;
549549

550-
if (ib_dev->ops.get_netdev) {
551-
idev = ib_dev->ops.get_netdev(ib_dev, port);
552-
if (idev && attr->ndev != idev) {
553-
union ib_gid default_gid;
554-
555-
/* Adding default GIDs in not permitted */
556-
make_default_gid(idev, &default_gid);
557-
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
558-
dev_put(idev);
559-
return -EPERM;
560-
}
561-
}
562-
if (idev)
550+
idev = ib_device_get_netdev(ib_dev, port);
551+
if (idev && attr->ndev != idev) {
552+
union ib_gid default_gid;
553+
554+
/* Adding default GIDs is not permitted */
555+
make_default_gid(idev, &default_gid);
556+
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
563557
dev_put(idev);
558+
return -EPERM;
559+
}
564560
}
561+
if (idev)
562+
dev_put(idev);
565563

566564
mask = GID_ATTR_FIND_MASK_GID |
567565
GID_ATTR_FIND_MASK_GID_TYPE |

drivers/infiniband/core/core_priv.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
6666
typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
6767
struct net_device *idev, void *cookie);
6868

69+
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
70+
unsigned int port);
71+
6972
void ib_enum_roce_netdev(struct ib_device *ib_dev,
7073
roce_netdev_filter filter,
7174
void *filter_cookie,

drivers/infiniband/core/device.c

Lines changed: 148 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
134134
!xa_is_err(entry); \
135135
(index)++, entry = xan_find_marked(xa, &(index), filter))
136136

137+
static void free_netdevs(struct ib_device *ib_dev);
137138
static int ib_security_change(struct notifier_block *nb, unsigned long event,
138139
void *lsm_data);
139140
static void ib_policy_change_task(struct work_struct *work);
@@ -290,6 +291,7 @@ static void ib_device_release(struct device *device)
290291
{
291292
struct ib_device *dev = container_of(device, struct ib_device, dev);
292293

294+
free_netdevs(dev);
293295
WARN_ON(refcount_read(&dev->refcount));
294296
ib_cache_release_one(dev);
295297
ib_security_release_port_pkey_list(dev);
@@ -371,6 +373,9 @@ EXPORT_SYMBOL(_ib_alloc_device);
371373
*/
372374
void ib_dealloc_device(struct ib_device *device)
373375
{
376+
/* Expedite releasing netdev references */
377+
free_netdevs(device);
378+
374379
WARN_ON(!xa_empty(&device->client_data));
375380
WARN_ON(refcount_read(&device->refcount));
376381
rdma_restrack_clean(device);
@@ -461,16 +466,16 @@ static void remove_client_context(struct ib_device *device,
461466
up_read(&device->client_data_rwsem);
462467
}
463468

464-
static int verify_immutable(const struct ib_device *dev, u8 port)
465-
{
466-
return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
467-
rdma_max_mad_size(dev, port) != 0);
468-
}
469-
470-
static int setup_port_data(struct ib_device *device)
469+
static int alloc_port_data(struct ib_device *device)
471470
{
472471
unsigned int port;
473-
int ret;
472+
473+
if (device->port_data)
474+
return 0;
475+
476+
/* This can only be called once the physical port range is defined */
477+
if (WARN_ON(!device->phys_port_cnt))
478+
return -EINVAL;
474479

475480
/*
476481
* device->port_data is indexed directly by the port number to make
@@ -489,6 +494,28 @@ static int setup_port_data(struct ib_device *device)
489494

490495
spin_lock_init(&pdata->pkey_list_lock);
491496
INIT_LIST_HEAD(&pdata->pkey_list);
497+
spin_lock_init(&pdata->netdev_lock);
498+
}
499+
return 0;
500+
}
501+
502+
static int verify_immutable(const struct ib_device *dev, u8 port)
503+
{
504+
return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
505+
rdma_max_mad_size(dev, port) != 0);
506+
}
507+
508+
static int setup_port_data(struct ib_device *device)
509+
{
510+
unsigned int port;
511+
int ret;
512+
513+
ret = alloc_port_data(device);
514+
if (ret)
515+
return ret;
516+
517+
rdma_for_each_port (device, port) {
518+
struct ib_port_data *pdata = &device->port_data[port];
492519

493520
ret = device->ops.get_port_immutable(device, port,
494521
&pdata->immutable);
@@ -682,6 +709,9 @@ static void disable_device(struct ib_device *device)
682709
/* Pairs with refcount_set in enable_device */
683710
ib_device_put(device);
684711
wait_for_completion(&device->unreg_completion);
712+
713+
/* Expedite removing unregistered pointers from the hash table */
714+
free_netdevs(device);
685715
}
686716

687717
/*
@@ -1012,6 +1042,114 @@ int ib_query_port(struct ib_device *device,
10121042
}
10131043
EXPORT_SYMBOL(ib_query_port);
10141044

1045+
/**
1046+
* ib_device_set_netdev - Associate the ib_dev with an underlying net_device
1047+
* @ib_dev: Device to modify
1048+
* @ndev: net_device to affiliate, may be NULL
1049+
* @port: IB port the net_device is connected to
1050+
*
1051+
* Drivers should use this to link the ib_device to a netdev so the netdev
1052+
* shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be
1053+
* affiliated with any port.
1054+
*
1055+
* The caller must ensure that the given ndev is not unregistered or
1056+
* unregistering, and that either the ib_device is unregistered or
1057+
* ib_device_set_netdev() is called with NULL when the ndev sends a
1058+
* NETDEV_UNREGISTER event.
1059+
*/
1060+
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
1061+
unsigned int port)
1062+
{
1063+
struct net_device *old_ndev;
1064+
struct ib_port_data *pdata;
1065+
unsigned long flags;
1066+
int ret;
1067+
1068+
/*
1069+
* Drivers wish to call this before ib_register_driver, so we have to
1070+
* setup the port data early.
1071+
*/
1072+
ret = alloc_port_data(ib_dev);
1073+
if (ret)
1074+
return ret;
1075+
1076+
if (!rdma_is_port_valid(ib_dev, port))
1077+
return -EINVAL;
1078+
1079+
pdata = &ib_dev->port_data[port];
1080+
spin_lock_irqsave(&pdata->netdev_lock, flags);
1081+
if (pdata->netdev == ndev) {
1082+
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1083+
return 0;
1084+
}
1085+
old_ndev = pdata->netdev;
1086+
1087+
if (ndev)
1088+
dev_hold(ndev);
1089+
pdata->netdev = ndev;
1090+
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1091+
1092+
if (old_ndev)
1093+
dev_put(old_ndev);
1094+
1095+
return 0;
1096+
}
1097+
EXPORT_SYMBOL(ib_device_set_netdev);
1098+
1099+
static void free_netdevs(struct ib_device *ib_dev)
1100+
{
1101+
unsigned long flags;
1102+
unsigned int port;
1103+
1104+
rdma_for_each_port (ib_dev, port) {
1105+
struct ib_port_data *pdata = &ib_dev->port_data[port];
1106+
1107+
spin_lock_irqsave(&pdata->netdev_lock, flags);
1108+
if (pdata->netdev) {
1109+
dev_put(pdata->netdev);
1110+
pdata->netdev = NULL;
1111+
}
1112+
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1113+
}
1114+
}
1115+
1116+
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
1117+
unsigned int port)
1118+
{
1119+
struct ib_port_data *pdata;
1120+
struct net_device *res;
1121+
1122+
if (!rdma_is_port_valid(ib_dev, port))
1123+
return NULL;
1124+
1125+
pdata = &ib_dev->port_data[port];
1126+
1127+
/*
1128+
* New drivers should use ib_device_set_netdev() not the legacy
1129+
* get_netdev().
1130+
*/
1131+
if (ib_dev->ops.get_netdev)
1132+
res = ib_dev->ops.get_netdev(ib_dev, port);
1133+
else {
1134+
spin_lock(&pdata->netdev_lock);
1135+
res = pdata->netdev;
1136+
if (res)
1137+
dev_hold(res);
1138+
spin_unlock(&pdata->netdev_lock);
1139+
}
1140+
1141+
/*
1142+
* If we are starting to unregister expedite things by preventing
1143+
* propagation of an unregistering netdev.
1144+
*/
1145+
if (res && res->reg_state != NETREG_REGISTERED) {
1146+
dev_put(res);
1147+
return NULL;
1148+
}
1149+
1150+
return res;
1151+
}
1152+
10151153
/**
10161154
* ib_enum_roce_netdev - enumerate all RoCE ports
10171155
* @ib_dev : IB device we want to query
@@ -1034,16 +1172,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
10341172

10351173
rdma_for_each_port (ib_dev, port)
10361174
if (rdma_protocol_roce(ib_dev, port)) {
1037-
struct net_device *idev = NULL;
1038-
1039-
if (ib_dev->ops.get_netdev)
1040-
idev = ib_dev->ops.get_netdev(ib_dev, port);
1041-
1042-
if (idev &&
1043-
idev->reg_state >= NETREG_UNREGISTERED) {
1044-
dev_put(idev);
1045-
idev = NULL;
1046-
}
1175+
struct net_device *idev =
1176+
ib_device_get_netdev(ib_dev, port);
10471177

10481178
if (filter(ib_dev, port, idev, filter_cookie))
10491179
cb(ib_dev, port, idev, cookie);

drivers/infiniband/core/nldev.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,9 +268,7 @@ static int fill_port_info(struct sk_buff *msg,
268268
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
269269
return -EMSGSIZE;
270270

271-
if (device->ops.get_netdev)
272-
netdev = device->ops.get_netdev(device, port);
273-
271+
netdev = ib_device_get_netdev(device, port);
274272
if (netdev && net_eq(dev_net(netdev), net)) {
275273
ret = nla_put_u32(msg,
276274
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);

drivers/infiniband/core/verbs.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,10 +1723,7 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
17231723
if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
17241724
return -EINVAL;
17251725

1726-
if (!dev->ops.get_netdev)
1727-
return -EOPNOTSUPP;
1728-
1729-
netdev = dev->ops.get_netdev(dev, port_num);
1726+
netdev = ib_device_get_netdev(dev, port_num);
17301727
if (!netdev)
17311728
return -ENODEV;
17321729

include/rdma/ib_verbs.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2204,6 +2204,9 @@ struct ib_port_data {
22042204
struct list_head pkey_list;
22052205

22062206
struct ib_port_cache cache;
2207+
2208+
spinlock_t netdev_lock;
2209+
struct net_device *netdev;
22072210
};
22082211

22092212
/* rdma netdev type - specifies protocol type */
@@ -3996,6 +3999,10 @@ void ib_device_put(struct ib_device *device);
39963999
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
39974000
u16 pkey, const union ib_gid *gid,
39984001
const struct sockaddr *addr);
4002+
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
4003+
unsigned int port);
4004+
struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
4005+
39994006
struct ib_wq *ib_create_wq(struct ib_pd *pd,
40004007
struct ib_wq_init_attr *init_attr);
40014008
int ib_destroy_wq(struct ib_wq *wq);

0 commit comments

Comments
 (0)