Skip to content

Commit 2121c43

Browse files
author
Paolo Abeni
committed
Merge branch 'inet_diag-remove-three-mutexes-in-diag-dumps'
Eric Dumazet says: ==================== inet_diag: remove three mutexes in diag dumps Surprisingly, inet_diag operations are serialized over a stack of three mutexes, giving legacy /proc based files an unfair advantage on modern hosts. This series removes all of them, making inet_diag operations (eg iproute2/ss) fully parallel. 1-2) Two first patches are adding data-race annotations and can be backported to stable kernels. 3-4) inet_diag_table_mutex can be replaced with RCU protection, if we add corresponding protection against module unload. 5-7) sock_diag_table_mutex can be replaced with RCU protection, if we add corresponding protection against module unload. 8) sock_diag_mutex is removed, as the old bug it was working around has been fixed more elegantly. 9) inet_diag_dump_icsk() can skip over empty buckets to reduce spinlock contention. ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
2 parents 736b554 + 622a08e commit 2121c43

File tree

17 files changed

+149
-97
lines changed

17 files changed

+149
-97
lines changed

include/linux/inet_diag.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
struct inet_hashinfo;
99

1010
struct inet_diag_handler {
11+
struct module *owner;
1112
void (*dump)(struct sk_buff *skb,
1213
struct netlink_callback *cb,
1314
const struct inet_diag_req_v2 *r);

include/linux/sock_diag.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ struct nlmsghdr;
1313
struct sock;
1414

1515
struct sock_diag_handler {
16+
struct module *owner;
1617
__u8 family;
1718
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
1819
int (*get_info)(struct sk_buff *skb, struct sock *sk);
@@ -22,8 +23,13 @@ struct sock_diag_handler {
2223
int sock_diag_register(const struct sock_diag_handler *h);
2324
void sock_diag_unregister(const struct sock_diag_handler *h);
2425

25-
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
26-
void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
26+
struct sock_diag_inet_compat {
27+
struct module *owner;
28+
int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh);
29+
};
30+
31+
void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr);
32+
void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr);
2733

2834
u64 __sock_gen_cookie(struct sock *sk);
2935

net/core/sock_diag.c

Lines changed: 68 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
#include <linux/inet_diag.h>
1717
#include <linux/sock_diag.h>
1818

19-
static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
20-
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
21-
static DEFINE_MUTEX(sock_diag_table_mutex);
19+
static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX];
20+
21+
static struct sock_diag_inet_compat __rcu *inet_rcv_compat;
22+
2223
static struct workqueue_struct *broadcast_wq;
2324

2425
DEFINE_COOKIE(sock_cookie);
@@ -122,6 +123,24 @@ static size_t sock_diag_nlmsg_size(void)
122123
+ nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
123124
}
124125

126+
static const struct sock_diag_handler *sock_diag_lock_handler(int family)
127+
{
128+
const struct sock_diag_handler *handler;
129+
130+
rcu_read_lock();
131+
handler = rcu_dereference(sock_diag_handlers[family]);
132+
if (handler && !try_module_get(handler->owner))
133+
handler = NULL;
134+
rcu_read_unlock();
135+
136+
return handler;
137+
}
138+
139+
static void sock_diag_unlock_handler(const struct sock_diag_handler *handler)
140+
{
141+
module_put(handler->owner);
142+
}
143+
125144
static void sock_diag_broadcast_destroy_work(struct work_struct *work)
126145
{
127146
struct broadcast_sk *bsk =
@@ -138,12 +157,12 @@ static void sock_diag_broadcast_destroy_work(struct work_struct *work)
138157
if (!skb)
139158
goto out;
140159

141-
mutex_lock(&sock_diag_table_mutex);
142-
hndl = sock_diag_handlers[sk->sk_family];
143-
if (hndl && hndl->get_info)
144-
err = hndl->get_info(skb, sk);
145-
mutex_unlock(&sock_diag_table_mutex);
146-
160+
hndl = sock_diag_lock_handler(sk->sk_family);
161+
if (hndl) {
162+
if (hndl->get_info)
163+
err = hndl->get_info(skb, sk);
164+
sock_diag_unlock_handler(hndl);
165+
}
147166
if (!err)
148167
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
149168
GFP_KERNEL);
@@ -166,51 +185,45 @@ void sock_diag_broadcast_destroy(struct sock *sk)
166185
queue_work(broadcast_wq, &bsk->work);
167186
}
168187

169-
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
188+
void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr)
170189
{
171-
mutex_lock(&sock_diag_table_mutex);
172-
inet_rcv_compat = fn;
173-
mutex_unlock(&sock_diag_table_mutex);
190+
xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
191+
ptr);
174192
}
175193
EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
176194

177-
void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
195+
void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr)
178196
{
179-
mutex_lock(&sock_diag_table_mutex);
180-
inet_rcv_compat = NULL;
181-
mutex_unlock(&sock_diag_table_mutex);
197+
const struct sock_diag_inet_compat *old;
198+
199+
old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
200+
NULL);
201+
WARN_ON_ONCE(old != ptr);
182202
}
183203
EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
184204

185205
int sock_diag_register(const struct sock_diag_handler *hndl)
186206
{
187-
int err = 0;
207+
int family = hndl->family;
188208

189-
if (hndl->family >= AF_MAX)
209+
if (family >= AF_MAX)
190210
return -EINVAL;
191211

192-
mutex_lock(&sock_diag_table_mutex);
193-
if (sock_diag_handlers[hndl->family])
194-
err = -EBUSY;
195-
else
196-
sock_diag_handlers[hndl->family] = hndl;
197-
mutex_unlock(&sock_diag_table_mutex);
198-
199-
return err;
212+
return !cmpxchg((const struct sock_diag_handler **)
213+
&sock_diag_handlers[family],
214+
NULL, hndl) ? 0 : -EBUSY;
200215
}
201216
EXPORT_SYMBOL_GPL(sock_diag_register);
202217

203-
void sock_diag_unregister(const struct sock_diag_handler *hnld)
218+
void sock_diag_unregister(const struct sock_diag_handler *hndl)
204219
{
205-
int family = hnld->family;
220+
int family = hndl->family;
206221

207222
if (family >= AF_MAX)
208223
return;
209224

210-
mutex_lock(&sock_diag_table_mutex);
211-
BUG_ON(sock_diag_handlers[family] != hnld);
212-
sock_diag_handlers[family] = NULL;
213-
mutex_unlock(&sock_diag_table_mutex);
225+
xchg((const struct sock_diag_handler **)&sock_diag_handlers[family],
226+
NULL);
214227
}
215228
EXPORT_SYMBOL_GPL(sock_diag_unregister);
216229

@@ -227,41 +240,48 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
227240
return -EINVAL;
228241
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);
229242

230-
if (sock_diag_handlers[req->sdiag_family] == NULL)
243+
if (!rcu_access_pointer(sock_diag_handlers[req->sdiag_family]))
231244
sock_load_diag_module(req->sdiag_family, 0);
232245

233-
mutex_lock(&sock_diag_table_mutex);
234-
hndl = sock_diag_handlers[req->sdiag_family];
246+
hndl = sock_diag_lock_handler(req->sdiag_family);
235247
if (hndl == NULL)
236-
err = -ENOENT;
237-
else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
248+
return -ENOENT;
249+
250+
if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
238251
err = hndl->dump(skb, nlh);
239252
else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
240253
err = hndl->destroy(skb, nlh);
241254
else
242255
err = -EOPNOTSUPP;
243-
mutex_unlock(&sock_diag_table_mutex);
256+
sock_diag_unlock_handler(hndl);
244257

245258
return err;
246259
}
247260

248261
static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
249262
struct netlink_ext_ack *extack)
250263
{
264+
const struct sock_diag_inet_compat *ptr;
251265
int ret;
252266

253267
switch (nlh->nlmsg_type) {
254268
case TCPDIAG_GETSOCK:
255269
case DCCPDIAG_GETSOCK:
256-
if (inet_rcv_compat == NULL)
270+
271+
if (!rcu_access_pointer(inet_rcv_compat))
257272
sock_load_diag_module(AF_INET, 0);
258273

259-
mutex_lock(&sock_diag_table_mutex);
260-
if (inet_rcv_compat != NULL)
261-
ret = inet_rcv_compat(skb, nlh);
262-
else
263-
ret = -EOPNOTSUPP;
264-
mutex_unlock(&sock_diag_table_mutex);
274+
rcu_read_lock();
275+
ptr = rcu_dereference(inet_rcv_compat);
276+
if (ptr && !try_module_get(ptr->owner))
277+
ptr = NULL;
278+
rcu_read_unlock();
279+
280+
ret = -EOPNOTSUPP;
281+
if (ptr) {
282+
ret = ptr->fn(skb, nlh);
283+
module_put(ptr->owner);
284+
}
265285

266286
return ret;
267287
case SOCK_DIAG_BY_FAMILY:
@@ -272,26 +292,22 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
272292
}
273293
}
274294

275-
static DEFINE_MUTEX(sock_diag_mutex);
276-
277295
static void sock_diag_rcv(struct sk_buff *skb)
278296
{
279-
mutex_lock(&sock_diag_mutex);
280297
netlink_rcv_skb(skb, &sock_diag_rcv_msg);
281-
mutex_unlock(&sock_diag_mutex);
282298
}
283299

284300
static int sock_diag_bind(struct net *net, int group)
285301
{
286302
switch (group) {
287303
case SKNLGRP_INET_TCP_DESTROY:
288304
case SKNLGRP_INET_UDP_DESTROY:
289-
if (!sock_diag_handlers[AF_INET])
305+
if (!rcu_access_pointer(sock_diag_handlers[AF_INET]))
290306
sock_load_diag_module(AF_INET, 0);
291307
break;
292308
case SKNLGRP_INET6_TCP_DESTROY:
293309
case SKNLGRP_INET6_UDP_DESTROY:
294-
if (!sock_diag_handlers[AF_INET6])
310+
if (!rcu_access_pointer(sock_diag_handlers[AF_INET6]))
295311
sock_load_diag_module(AF_INET6, 0);
296312
break;
297313
}

net/dccp/diag.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ static int dccp_diag_dump_one(struct netlink_callback *cb,
5858
}
5959

6060
static const struct inet_diag_handler dccp_diag_handler = {
61+
.owner = THIS_MODULE,
6162
.dump = dccp_diag_dump,
6263
.dump_one = dccp_diag_dump_one,
6364
.idiag_get_info = dccp_diag_get_info,

0 commit comments

Comments
 (0)