Skip to content

Commit 7c754e6

Browse files
committed
Merge branch 'dev_base_lock-remove'
Eric Dumazet says: ==================== net: complete dev_base_lock removal Back in 2009 we started an effort to get rid of dev_base_lock in favor of RCU. It is time to finish this work. Say goodbye to dev_base_lock ! v4: rebase, and move dev_addr_sem to net/core/dev.h in patch 06/13 (Jakub) v3: I misread kbot reports, the issue was with dev->operstate (patch 10/13) So dev->reg_state is back to u8, and dev->operstate becomes an u32. Sorry for the noise. v2: dev->reg_state must be a standard enum, some arches do not support cmpxchg() on u8. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents b7f9ef7 + 1b3ef46 commit 7c754e6

File tree

17 files changed

+112
-145
lines changed

17 files changed

+112
-145
lines changed

Documentation/networking/netdevices.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,8 @@ ndo_eth_ioctl:
252252
Context: process
253253

254254
ndo_get_stats:
255-
Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU.
256-
Context: atomic (can't sleep under rwlock or RCU)
255+
Synchronization: rtnl_lock() semaphore, or RCU.
256+
Context: atomic (can't sleep under RCU)
257257

258258
ndo_start_xmit:
259259
Synchronization: __netif_tx_lock spinlock.

drivers/net/ethernet/cisco/enic/enic_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
872872
return NETDEV_TX_OK;
873873
}
874874

875-
/* dev_base_lock rwlock held, nominally process context */
875+
/* rcu_read_lock potentially held, nominally process context */
876876
static void enic_get_stats(struct net_device *netdev,
877877
struct rtnl_link_stats64 *net_stats)
878878
{

drivers/net/ethernet/nvidia/forcedeth.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1761,7 +1761,7 @@ static void nv_get_stats(int cpu, struct fe_priv *np,
17611761
/*
17621762
* nv_get_stats64: dev->ndo_get_stats64 function
17631763
* Get latest stats value from the nic.
1764-
* Called with read_lock(&dev_base_lock) held for read -
1764+
* Called with rcu_read_lock() held -
17651765
* only synchronized against unregister_netdevice.
17661766
*/
17671767
static void
@@ -3090,7 +3090,7 @@ static void set_bufsize(struct net_device *dev)
30903090

30913091
/*
30923092
* nv_change_mtu: dev->change_mtu function
3093-
* Called with dev_base_lock held for read.
3093+
* Called with RTNL held for read.
30943094
*/
30953095
static int nv_change_mtu(struct net_device *dev, int new_mtu)
30963096
{

drivers/net/ethernet/sfc/efx_common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ void efx_stop_all(struct efx_nic *efx)
595595
efx_stop_datapath(efx);
596596
}
597597

598-
/* Context: process, dev_base_lock or RTNL held, non-blocking. */
598+
/* Context: process, rcu_read_lock or RTNL held, non-blocking. */
599599
void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
600600
{
601601
struct efx_nic *efx = efx_netdev_priv(net_dev);

drivers/net/ethernet/sfc/falcon/efx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2085,7 +2085,7 @@ int ef4_net_stop(struct net_device *net_dev)
20852085
return 0;
20862086
}
20872087

2088-
/* Context: process, dev_base_lock or RTNL held, non-blocking. */
2088+
/* Context: process, rcu_read_lock or RTNL held, non-blocking. */
20892089
static void ef4_net_stats(struct net_device *net_dev,
20902090
struct rtnl_link_stats64 *stats)
20912091
{

drivers/net/ethernet/sfc/siena/efx_common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ static size_t efx_siena_update_stats_atomic(struct efx_nic *efx, u64 *full_stats
605605
return efx->type->update_stats(efx, full_stats, core_stats);
606606
}
607607

608-
/* Context: process, dev_base_lock or RTNL held, non-blocking. */
608+
/* Context: process, rcu_read_lock or RTNL held, non-blocking. */
609609
void efx_siena_net_stats(struct net_device *net_dev,
610610
struct rtnl_link_stats64 *stats)
611611
{

include/linux/netdevice.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,6 +1815,15 @@ enum netdev_stat_type {
18151815
NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
18161816
};
18171817

1818+
enum netdev_reg_state {
1819+
NETREG_UNINITIALIZED = 0,
1820+
NETREG_REGISTERED, /* completed register_netdevice */
1821+
NETREG_UNREGISTERING, /* called unregister_netdevice */
1822+
NETREG_UNREGISTERED, /* completed unregister todo */
1823+
NETREG_RELEASED, /* called free_netdev */
1824+
NETREG_DUMMY, /* dummy device for NAPI poll */
1825+
};
1826+
18181827
/**
18191828
* struct net_device - The DEVICE structure.
18201829
*
@@ -2249,7 +2258,7 @@ struct net_device {
22492258
const struct tlsdev_ops *tlsdev_ops;
22502259
#endif
22512260

2252-
unsigned char operstate;
2261+
unsigned int operstate;
22532262
unsigned char link_mode;
22542263

22552264
unsigned char if_port;
@@ -2372,13 +2381,7 @@ struct net_device {
23722381

23732382
struct list_head link_watch_list;
23742383

2375-
enum { NETREG_UNINITIALIZED=0,
2376-
NETREG_REGISTERED, /* completed register_netdevice */
2377-
NETREG_UNREGISTERING, /* called unregister_netdevice */
2378-
NETREG_UNREGISTERED, /* completed unregister todo */
2379-
NETREG_RELEASED, /* called free_netdev */
2380-
NETREG_DUMMY, /* dummy device for NAPI poll */
2381-
} reg_state:8;
2384+
u8 reg_state;
23822385

23832386
bool dismantle;
23842387

@@ -3074,8 +3077,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
30743077
int call_netdevice_notifiers_info(unsigned long val,
30753078
struct netdev_notifier_info *info);
30763079

3077-
extern rwlock_t dev_base_lock; /* Device list lock */
3078-
30793080
#define for_each_netdev(net, d) \
30803081
list_for_each_entry(d, &(net)->dev_base_head, dev_list)
30813082
#define for_each_netdev_reverse(net, d) \
@@ -5254,7 +5255,9 @@ static inline const char *netdev_name(const struct net_device *dev)
52545255

52555256
static inline const char *netdev_reg_state(const struct net_device *dev)
52565257
{
5257-
switch (dev->reg_state) {
5258+
u8 reg_state = READ_ONCE(dev->reg_state);
5259+
5260+
switch (reg_state) {
52585261
case NETREG_UNINITIALIZED: return " (uninitialized)";
52595262
case NETREG_REGISTERED: return "";
52605263
case NETREG_UNREGISTERING: return " (unregistering)";
@@ -5263,7 +5266,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
52635266
case NETREG_DUMMY: return " (dummy)";
52645267
}
52655268

5266-
WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state);
5269+
WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state);
52675270
return " (unknown)";
52685271
}
52695272

include/linux/rtnetlink.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group)
172172
return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
173173
}
174174

175+
void netdev_set_operstate(struct net_device *dev, int newstate);
176+
175177
#endif /* __LINUX_RTNETLINK_H */

net/bridge/br_netlink.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
455455
u32 filter_mask, const struct net_device *dev,
456456
bool getlink)
457457
{
458-
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
458+
u8 operstate = netif_running(dev) ? READ_ONCE(dev->operstate) :
459+
IF_OPER_DOWN;
459460
struct nlattr *af = NULL;
460461
struct net_bridge *br;
461462
struct ifinfomsg *hdr;

net/core/dev.c

Lines changed: 17 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -168,28 +168,6 @@ static int call_netdevice_notifiers_extack(unsigned long val,
168168
struct net_device *dev,
169169
struct netlink_ext_ack *extack);
170170

171-
/*
172-
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
173-
* semaphore.
174-
*
175-
* Pure readers hold dev_base_lock for reading, or rcu_read_lock()
176-
*
177-
* Writers must hold the rtnl semaphore while they loop through the
178-
* dev_base_head list, and hold dev_base_lock for writing when they do the
179-
* actual updates. This allows pure readers to access the list even
180-
* while a writer is preparing to update it.
181-
*
182-
* To put it another way, dev_base_lock is held for writing only to
183-
* protect against pure readers; the rtnl semaphore provides the
184-
* protection against other writers.
185-
*
186-
* See, for example usages, register_netdevice() and
187-
* unregister_netdevice(), which must be called with the rtnl
188-
* semaphore held.
189-
*/
190-
DEFINE_RWLOCK(dev_base_lock);
191-
EXPORT_SYMBOL(dev_base_lock);
192-
193171
static DEFINE_MUTEX(ifalias_mutex);
194172

195173
/* protects napi_hash addition/deletion and napi_gen_id */
@@ -395,12 +373,10 @@ static void list_netdevice(struct net_device *dev)
395373

396374
ASSERT_RTNL();
397375

398-
write_lock(&dev_base_lock);
399376
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
400377
netdev_name_node_add(net, dev->name_node);
401378
hlist_add_head_rcu(&dev->index_hlist,
402379
dev_index_hash(net, dev->ifindex));
403-
write_unlock(&dev_base_lock);
404380

405381
netdev_for_each_altname(dev, name_node)
406382
netdev_name_node_add(net, name_node);
@@ -414,7 +390,7 @@ static void list_netdevice(struct net_device *dev)
414390
/* Device list removal
415391
* caller must respect a RCU grace period before freeing/reusing dev
416392
*/
417-
static void unlist_netdevice(struct net_device *dev, bool lock)
393+
static void unlist_netdevice(struct net_device *dev)
418394
{
419395
struct netdev_name_node *name_node;
420396
struct net *net = dev_net(dev);
@@ -427,13 +403,9 @@ static void unlist_netdevice(struct net_device *dev, bool lock)
427403
netdev_name_node_del(name_node);
428404

429405
/* Unlink dev from the device chain */
430-
if (lock)
431-
write_lock(&dev_base_lock);
432406
list_del_rcu(&dev->dev_list);
433407
netdev_name_node_del(dev->name_node);
434408
hlist_del_rcu(&dev->index_hlist);
435-
if (lock)
436-
write_unlock(&dev_base_lock);
437409

438410
dev_base_seq_inc(dev_net(dev));
439411
}
@@ -754,9 +726,9 @@ EXPORT_SYMBOL_GPL(dev_fill_forward_path);
754726
* @net: the applicable net namespace
755727
* @name: name to find
756728
*
757-
* Find an interface by name. Must be called under RTNL semaphore
758-
* or @dev_base_lock. If the name is found a pointer to the device
759-
* is returned. If the name is not found then %NULL is returned. The
729+
* Find an interface by name. Must be called under RTNL semaphore.
730+
* If the name is found a pointer to the device is returned.
731+
* If the name is not found then %NULL is returned. The
760732
* reference counters are not incremented so the caller must be
761733
* careful with locks.
762734
*/
@@ -837,8 +809,7 @@ EXPORT_SYMBOL(netdev_get_by_name);
837809
* Search for an interface by index. Returns %NULL if the device
838810
* is not found or a pointer to the device. The device has not
839811
* had its reference counter increased so the caller must be careful
840-
* about locking. The caller must hold either the RTNL semaphore
841-
* or @dev_base_lock.
812+
* about locking. The caller must hold the RTNL semaphore.
842813
*/
843814

844815
struct net_device *__dev_get_by_index(struct net *net, int ifindex)
@@ -1228,13 +1199,13 @@ int dev_change_name(struct net_device *dev, const char *newname)
12281199
dev->flags & IFF_UP ? " (while UP)" : "");
12291200

12301201
old_assign_type = dev->name_assign_type;
1231-
dev->name_assign_type = NET_NAME_RENAMED;
1202+
WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED);
12321203

12331204
rollback:
12341205
ret = device_rename(&dev->dev, dev->name);
12351206
if (ret) {
12361207
memcpy(dev->name, oldname, IFNAMSIZ);
1237-
dev->name_assign_type = old_assign_type;
1208+
WRITE_ONCE(dev->name_assign_type, old_assign_type);
12381209
up_write(&devnet_rename_sem);
12391210
return ret;
12401211
}
@@ -1243,15 +1214,11 @@ int dev_change_name(struct net_device *dev, const char *newname)
12431214

12441215
netdev_adjacent_rename_links(dev, oldname);
12451216

1246-
write_lock(&dev_base_lock);
12471217
netdev_name_node_del(dev->name_node);
1248-
write_unlock(&dev_base_lock);
12491218

12501219
synchronize_net();
12511220

1252-
write_lock(&dev_base_lock);
12531221
netdev_name_node_add(net, dev->name_node);
1254-
write_unlock(&dev_base_lock);
12551222

12561223
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
12571224
ret = notifier_to_errno(ret);
@@ -1263,7 +1230,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
12631230
down_write(&devnet_rename_sem);
12641231
memcpy(dev->name, oldname, IFNAMSIZ);
12651232
memcpy(oldname, newname, IFNAMSIZ);
1266-
dev->name_assign_type = old_assign_type;
1233+
WRITE_ONCE(dev->name_assign_type, old_assign_type);
12671234
old_assign_type = NET_NAME_RENAMED;
12681235
goto rollback;
12691236
} else {
@@ -8993,7 +8960,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
89938960
}
89948961
EXPORT_SYMBOL(dev_set_mac_address);
89958962

8996-
static DECLARE_RWSEM(dev_addr_sem);
8963+
DECLARE_RWSEM(dev_addr_sem);
89978964

89988965
int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
89998966
struct netlink_ext_ack *extack)
@@ -10338,9 +10305,9 @@ int register_netdevice(struct net_device *dev)
1033810305
goto err_ifindex_release;
1033910306

1034010307
ret = netdev_register_kobject(dev);
10341-
write_lock(&dev_base_lock);
10342-
dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
10343-
write_unlock(&dev_base_lock);
10308+
10309+
WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
10310+
1034410311
if (ret)
1034510312
goto err_uninit_notify;
1034610313

@@ -10629,9 +10596,7 @@ void netdev_run_todo(void)
1062910596
continue;
1063010597
}
1063110598

10632-
write_lock(&dev_base_lock);
10633-
dev->reg_state = NETREG_UNREGISTERED;
10634-
write_unlock(&dev_base_lock);
10599+
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
1063510600
linkwatch_sync_dev(dev);
1063610601
}
1063710602

@@ -11050,7 +11015,7 @@ void free_netdev(struct net_device *dev)
1105011015
}
1105111016

1105211017
BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
11053-
dev->reg_state = NETREG_RELEASED;
11018+
WRITE_ONCE(dev->reg_state, NETREG_RELEASED);
1105411019

1105511020
/* will free via device release */
1105611021
put_device(&dev->dev);
@@ -11138,10 +11103,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
1113811103

1113911104
list_for_each_entry(dev, head, unreg_list) {
1114011105
/* And unlink it from device chain. */
11141-
write_lock(&dev_base_lock);
11142-
unlist_netdevice(dev, false);
11143-
dev->reg_state = NETREG_UNREGISTERING;
11144-
write_unlock(&dev_base_lock);
11106+
unlist_netdevice(dev);
11107+
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
1114511108
}
1114611109
flush_all_backlogs();
1114711110

@@ -11323,7 +11286,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
1132311286
dev_close(dev);
1132411287

1132511288
/* And unlink it from device chain */
11326-
unlist_netdevice(dev, true);
11289+
unlist_netdevice(dev);
1132711290

1132811291
synchronize_net();
1132911292

net/core/dev.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define _NET_CORE_DEV_H
44

55
#include <linux/types.h>
6+
#include <linux/rwsem.h>
67

78
struct net;
89
struct net_device;
@@ -46,6 +47,8 @@ extern int weight_p;
4647
extern int dev_weight_rx_bias;
4748
extern int dev_weight_tx_bias;
4849

50+
extern struct rw_semaphore dev_addr_sem;
51+
4952
/* rtnl helpers */
5053
extern struct list_head net_todo_list;
5154
void netdev_run_todo(void);

net/core/link_watch.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
3333
static LIST_HEAD(lweventlist);
3434
static DEFINE_SPINLOCK(lweventlist_lock);
3535

36-
static unsigned char default_operstate(const struct net_device *dev)
36+
static unsigned int default_operstate(const struct net_device *dev)
3737
{
3838
if (netif_testing(dev))
3939
return IF_OPER_TESTING;
@@ -62,16 +62,13 @@ static unsigned char default_operstate(const struct net_device *dev)
6262
return IF_OPER_UP;
6363
}
6464

65-
6665
static void rfc2863_policy(struct net_device *dev)
6766
{
68-
unsigned char operstate = default_operstate(dev);
67+
unsigned int operstate = default_operstate(dev);
6968

70-
if (operstate == dev->operstate)
69+
if (operstate == READ_ONCE(dev->operstate))
7170
return;
7271

73-
write_lock(&dev_base_lock);
74-
7572
switch(dev->link_mode) {
7673
case IF_LINK_MODE_TESTING:
7774
if (operstate == IF_OPER_UP)
@@ -87,9 +84,7 @@ static void rfc2863_policy(struct net_device *dev)
8784
break;
8885
}
8986

90-
dev->operstate = operstate;
91-
92-
write_unlock(&dev_base_lock);
87+
WRITE_ONCE(dev->operstate, operstate);
9388
}
9489

9590

0 commit comments

Comments
 (0)