Skip to content

Commit 64f3807

Browse files
Kirill TkhaiNagarathnam Muthusamy
authored andcommitted
net: Introduce net_sem for protection of pernet_list
Currently, the mutex is mostly used to protect pernet operations list. It orders setup_net() and cleanup_net() with parallel {un,}register_pernet_operations() calls, so ->exit{,batch} methods of the same pernet operations are executed for a dying net, as were used to call ->init methods, even after the net namespace is unlinked from net_namespace_list in cleanup_net(). But there are several problems with scalability. The first one is that more than one net can't be created or destroyed at the same moment on the node. For big machines with many cpus running many containers it's very sensitive. The second one is that it's need to synchronize_rcu() after net is removed from net_namespace_list(): Destroy net_ns: cleanup_net() mutex_lock(&net_mutex) list_del_rcu(&net->list) synchronize_rcu() <--- Sleep there for ages list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list) list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list) mutex_unlock(&net_mutex) This primitive is not fast, especially on the systems with many processors and/or when preemptible RCU is enabled in config. So, all the time, while cleanup_net() is waiting for RCU grace period, creation of new net namespaces is not possible, the tasks, who makes it, are sleeping on the same mutex: Create net_ns: copy_net_ns() mutex_lock_killable(&net_mutex) <--- Sleep there for ages I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop with preemptible RCU enabled after CRIU tests round is finished. The solution is to convert net_mutex to the rw_semaphore and add fine grain locks to really small number of pernet_operations, what really need them. Then, pernet_operations::init/::exit methods, modifying the net-related data, will require down_read() locking only, while down_write() will be used for changing pernet_list (i.e., when modules are being loaded and unloaded). This gives signify performance increase, after all patch set is applied, like you may see here: %for i in {1..10000}; do unshare -n bash -c exit; done *before* real 1m40,377s user 0m9,672s sys 0m19,928s *after* real 0m17,007s user 0m5,311s sys 0m11,779 (5.8 times faster) This patch starts replacing net_mutex to net_sem. It adds rw_semaphore, describes the variables it protects, and makes to use, where appropriate. net_mutex is still present, and next patches will kick it out step-by-step. Signed-off-by: Kirill Tkhai <[email protected]> Acked-by: Andrei Vagin <[email protected]> Signed-off-by: David S. Miller <[email protected]> (cherry picked from commit 1a57feb) Orabug: 28900385 Signed-off-by: Nagarathnam Muthusamy <[email protected]> Reviewed-by: Darren Kenny <[email protected]>
1 parent 7faad04 commit 64f3807

File tree

3 files changed

+29
-15
lines changed

3 files changed

+29
-15
lines changed

include/linux/rtnetlink.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extern int rtnl_is_locked(void);
3333

3434
extern wait_queue_head_t netdev_unregistering_wq;
3535
extern struct mutex net_mutex;
36+
extern struct rw_semaphore net_sem;
3637

3738
#ifdef CONFIG_PROVE_LOCKING
3839
extern bool lockdep_rtnl_is_held(void);

net/core/net_namespace.c

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ struct net init_net = {
4141
EXPORT_SYMBOL(init_net);
4242

4343
static bool init_net_initialized;
44+
/*
45+
* net_sem: protects: pernet_list, net_generic_ids,
46+
* init_net_initialized and first_device pointer.
47+
*/
48+
DECLARE_RWSEM(net_sem);
4449

4550
#define MIN_PERNET_OPS_ID \
4651
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -278,7 +283,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
278283
*/
279284
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
280285
{
281-
/* Must be called with net_mutex held */
286+
/* Must be called with net_sem held */
282287
const struct pernet_operations *ops, *saved_ops;
283288
int error = 0;
284289
LIST_HEAD(net_exit_list);
@@ -410,12 +415,16 @@ struct net *copy_net_ns(unsigned long flags,
410415
net->ucounts = ucounts;
411416
get_user_ns(user_ns);
412417

413-
rv = mutex_lock_killable(&net_mutex);
418+
rv = down_read_killable(&net_sem);
414419
if (rv < 0)
415420
goto put_userns;
416-
421+
rv = mutex_lock_killable(&net_mutex);
422+
if (rv < 0)
423+
goto up_read;
417424
rv = setup_net(net, user_ns);
418425
mutex_unlock(&net_mutex);
426+
up_read:
427+
up_read(&net_sem);
419428
if (rv < 0) {
420429
put_userns:
421430
put_user_ns(user_ns);
@@ -442,6 +451,7 @@ static void cleanup_net(struct work_struct *work)
442451
list_replace_init(&cleanup_list, &net_kill_list);
443452
spin_unlock_irq(&cleanup_list_lock);
444453

454+
down_read(&net_sem);
445455
mutex_lock(&net_mutex);
446456

447457
/* Don't let anyone else find us. */
@@ -483,6 +493,7 @@ static void cleanup_net(struct work_struct *work)
483493
ops_free_list(ops, &net_exit_list);
484494

485495
mutex_unlock(&net_mutex);
496+
up_read(&net_sem);
486497

487498
/* Ensure there are no outstanding rcu callbacks using this
488499
* network namespace.
@@ -509,8 +520,10 @@ static void cleanup_net(struct work_struct *work)
509520
*/
510521
void net_ns_barrier(void)
511522
{
523+
down_write(&net_sem);
512524
mutex_lock(&net_mutex);
513525
mutex_unlock(&net_mutex);
526+
up_write(&net_sem);
514527
}
515528
EXPORT_SYMBOL(net_ns_barrier);
516529

@@ -837,12 +850,12 @@ static int __init net_ns_init(void)
837850

838851
rcu_assign_pointer(init_net.gen, ng);
839852

840-
mutex_lock(&net_mutex);
853+
down_write(&net_sem);
841854
if (setup_net(&init_net, &init_user_ns))
842855
panic("Could not setup the initial network namespace");
843856

844857
init_net_initialized = true;
845-
mutex_unlock(&net_mutex);
858+
up_write(&net_sem);
846859

847860
register_pernet_subsys(&net_ns_ops);
848861

@@ -982,9 +995,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
982995
int register_pernet_subsys(struct pernet_operations *ops)
983996
{
984997
int error;
985-
mutex_lock(&net_mutex);
998+
down_write(&net_sem);
986999
error = register_pernet_operations(first_device, ops);
987-
mutex_unlock(&net_mutex);
1000+
up_write(&net_sem);
9881001
return error;
9891002
}
9901003
EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1000,9 +1013,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
10001013
*/
10011014
void unregister_pernet_subsys(struct pernet_operations *ops)
10021015
{
1003-
mutex_lock(&net_mutex);
1016+
down_write(&net_sem);
10041017
unregister_pernet_operations(ops);
1005-
mutex_unlock(&net_mutex);
1018+
up_write(&net_sem);
10061019
}
10071020
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
10081021

@@ -1028,11 +1041,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
10281041
int register_pernet_device(struct pernet_operations *ops)
10291042
{
10301043
int error;
1031-
mutex_lock(&net_mutex);
1044+
down_write(&net_sem);
10321045
error = register_pernet_operations(&pernet_list, ops);
10331046
if (!error && (first_device == &pernet_list))
10341047
first_device = &ops->list;
1035-
mutex_unlock(&net_mutex);
1048+
up_write(&net_sem);
10361049
return error;
10371050
}
10381051
EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1048,11 +1061,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
10481061
*/
10491062
void unregister_pernet_device(struct pernet_operations *ops)
10501063
{
1051-
mutex_lock(&net_mutex);
1064+
down_write(&net_sem);
10521065
if (&ops->list == first_device)
10531066
first_device = first_device->next;
10541067
unregister_pernet_operations(ops);
1055-
mutex_unlock(&net_mutex);
1068+
up_write(&net_sem);
10561069
}
10571070
EXPORT_SYMBOL_GPL(unregister_pernet_device);
10581071

net/core/rtnetlink.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,11 +390,11 @@ static void rtnl_lock_unregistering_all(void)
390390
void rtnl_link_unregister(struct rtnl_link_ops *ops)
391391
{
392392
/* Close the race with cleanup_net() */
393-
mutex_lock(&net_mutex);
393+
down_write(&net_sem);
394394
rtnl_lock_unregistering_all();
395395
__rtnl_link_unregister(ops);
396396
rtnl_unlock();
397-
mutex_unlock(&net_mutex);
397+
up_write(&net_sem);
398398
}
399399
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
400400

0 commit comments

Comments
 (0)