Skip to content

Commit fc13fd3

Browse files
committed
Merge branch 'udp-fwd-mem-sched-on-dequeue'
Paolo Abeni says: ==================== udp: do fwd memory scheduling on dequeue After commit 850cbad ("udp: use it's own memory accounting schema"), the udp code needs to acquire twice the receive queue spinlock on dequeue. This patch series remove the need for the second lock at skb free time, moving the udp memory scheduling inside the dequeue operation; the skb destructor field is not used anymore and an additional sk argument is added to ip_cmsg_recv_offset() to cope with null skb->sk after dequeue. Many thanks to Eric Dumazed for suggesting pretty all much the above. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents cd2c0f4 + 7c13f97 commit fc13fd3

File tree

11 files changed

+71
-40
lines changed

11 files changed

+71
-40
lines changed

include/linux/skbuff.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
30333033
int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
30343034
const struct sk_buff *skb);
30353035
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
3036+
void (*destructor)(struct sock *sk,
3037+
struct sk_buff *skb),
30363038
int *peeked, int *off, int *err,
30373039
struct sk_buff **last);
30383040
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
3041+
void (*destructor)(struct sock *sk,
3042+
struct sk_buff *skb),
30393043
int *peeked, int *off, int *err);
30403044
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
30413045
int *err);

include/net/ip.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,8 @@ int ip_options_rcv_srr(struct sk_buff *skb);
579579
*/
580580

581581
void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
582-
void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset);
582+
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
583+
struct sk_buff *skb, int tlen, int offset);
583584
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
584585
struct ipcm_cookie *ipc, bool allow_ipv6);
585586
int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
@@ -601,7 +602,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
601602

602603
static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
603604
{
604-
ip_cmsg_recv_offset(msg, skb, 0, 0);
605+
ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
605606
}
606607

607608
bool icmp_global_allow(void);

include/net/udp.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,21 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
248248
/* net/ipv4/udp.c */
249249
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
250250
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
251+
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
252+
static inline struct sk_buff *
253+
__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked,
254+
int *off, int *err)
255+
{
256+
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
257+
udp_skb_destructor, peeked, off, err);
258+
}
259+
static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
260+
int noblock, int *err)
261+
{
262+
int peeked, off = 0;
263+
264+
return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
265+
}
251266

252267
void udp_v4_early_demux(struct sk_buff *skb);
253268
int udp_get_port(struct sock *sk, unsigned short snum,

net/core/datagram.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
165165
* __skb_try_recv_datagram - Receive a datagram skbuff
166166
* @sk: socket
167167
* @flags: MSG_ flags
168+
* @destructor: invoked under the receive lock on successful dequeue
168169
* @peeked: returns non-zero if this packet has been seen before
169170
* @off: an offset in bytes to peek skb from. Returns an offset
170171
* within an skb where data actually starts
@@ -197,6 +198,8 @@ static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
197198
* the standard around please.
198199
*/
199200
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
201+
void (*destructor)(struct sock *sk,
202+
struct sk_buff *skb),
200203
int *peeked, int *off, int *err,
201204
struct sk_buff **last)
202205
{
@@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
241244
}
242245

243246
atomic_inc(&skb->users);
244-
} else
247+
} else {
245248
__skb_unlink(skb, queue);
246-
249+
if (destructor)
250+
destructor(sk, skb);
251+
}
247252
spin_unlock_irqrestore(&queue->lock, cpu_flags);
248253
*off = _off;
249254
return skb;
@@ -262,6 +267,8 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
262267
EXPORT_SYMBOL(__skb_try_recv_datagram);
263268

264269
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
270+
void (*destructor)(struct sock *sk,
271+
struct sk_buff *skb),
265272
int *peeked, int *off, int *err)
266273
{
267274
struct sk_buff *skb, *last;
@@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
270277
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
271278

272279
do {
273-
skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
274-
&last);
280+
skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
281+
off, err, &last);
275282
if (skb)
276283
return skb;
277284

@@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
290297
int peeked, off = 0;
291298

292299
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
293-
&peeked, &off, err);
300+
NULL, &peeked, &off, err);
294301
}
295302
EXPORT_SYMBOL(skb_recv_datagram);
296303

net/ipv4/ip_sockglue.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
164164
put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
165165
}
166166

167-
void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
168-
int tlen, int offset)
167+
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
168+
struct sk_buff *skb, int tlen, int offset)
169169
{
170-
struct inet_sock *inet = inet_sk(skb->sk);
170+
struct inet_sock *inet = inet_sk(sk);
171171
unsigned int flags = inet->cmsg_flags;
172172

173173
/* Ordered by supposed usage frequency */

net/ipv4/udp.c

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,26 +1173,26 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
11731173
return ret;
11741174
}
11751175

1176+
/* fully reclaim rmem/fwd memory allocated for skb */
11761177
static void udp_rmem_release(struct sock *sk, int size, int partial)
11771178
{
11781179
int amt;
11791180

11801181
atomic_sub(size, &sk->sk_rmem_alloc);
1181-
1182-
spin_lock_bh(&sk->sk_receive_queue.lock);
11831182
sk->sk_forward_alloc += size;
11841183
amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
11851184
sk->sk_forward_alloc -= amt;
1186-
spin_unlock_bh(&sk->sk_receive_queue.lock);
11871185

11881186
if (amt)
11891187
__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
11901188
}
11911189

1192-
static void udp_rmem_free(struct sk_buff *skb)
1190+
/* Note: called with sk_receive_queue.lock held */
1191+
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
11931192
{
1194-
udp_rmem_release(skb->sk, skb->truesize, 1);
1193+
udp_rmem_release(sk, skb->truesize, 1);
11951194
}
1195+
EXPORT_SYMBOL(udp_skb_destructor);
11961196

11971197
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
11981198
{
@@ -1229,9 +1229,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
12291229

12301230
sk->sk_forward_alloc -= size;
12311231

1232-
/* the skb owner in now the udp socket */
1233-
skb->sk = sk;
1234-
skb->destructor = udp_rmem_free;
1232+
/* no need to setup a destructor, we will explicitly release the
1233+
* forward allocated memory on dequeue
1234+
*/
12351235
skb->dev = NULL;
12361236
sock_skb_set_dropcount(sk, skb);
12371237

@@ -1255,8 +1255,15 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
12551255
static void udp_destruct_sock(struct sock *sk)
12561256
{
12571257
/* reclaim completely the forward allocated memory */
1258-
__skb_queue_purge(&sk->sk_receive_queue);
1259-
udp_rmem_release(sk, 0, 0);
1258+
unsigned int total = 0;
1259+
struct sk_buff *skb;
1260+
1261+
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1262+
total += skb->truesize;
1263+
kfree_skb(skb);
1264+
}
1265+
udp_rmem_release(sk, total, 0);
1266+
12601267
inet_sock_destruct(sk);
12611268
}
12621269

@@ -1288,12 +1295,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
12881295
*/
12891296
static int first_packet_length(struct sock *sk)
12901297
{
1291-
struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
1298+
struct sk_buff_head *rcvq = &sk->sk_receive_queue;
12921299
struct sk_buff *skb;
1300+
int total = 0;
12931301
int res;
12941302

1295-
__skb_queue_head_init(&list_kill);
1296-
12971303
spin_lock_bh(&rcvq->lock);
12981304
while ((skb = skb_peek(rcvq)) != NULL &&
12991305
udp_lib_checksum_complete(skb)) {
@@ -1303,12 +1309,13 @@ static int first_packet_length(struct sock *sk)
13031309
IS_UDPLITE(sk));
13041310
atomic_inc(&sk->sk_drops);
13051311
__skb_unlink(skb, rcvq);
1306-
__skb_queue_tail(&list_kill, skb);
1312+
total += skb->truesize;
1313+
kfree_skb(skb);
13071314
}
13081315
res = skb ? skb->len : -1;
1316+
if (total)
1317+
udp_rmem_release(sk, total, 1);
13091318
spin_unlock_bh(&rcvq->lock);
1310-
1311-
__skb_queue_purge(&list_kill);
13121319
return res;
13131320
}
13141321

@@ -1363,8 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
13631370

13641371
try_again:
13651372
peeking = off = sk_peek_offset(sk, flags);
1366-
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
1367-
&peeked, &off, &err);
1373+
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
13681374
if (!skb)
13691375
return err;
13701376

@@ -1421,7 +1427,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
14211427
*addr_len = sizeof(*sin);
14221428
}
14231429
if (inet->cmsg_flags)
1424-
ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off);
1430+
ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
14251431

14261432
err = copied;
14271433
if (flags & MSG_TRUNC)

net/ipv6/udp.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
343343

344344
try_again:
345345
peeking = off = sk_peek_offset(sk, flags);
346-
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
347-
&peeked, &off, &err);
346+
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
348347
if (!skb)
349348
return err;
350349

@@ -425,7 +424,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
425424

426425
if (is_udp4) {
427426
if (inet->cmsg_flags)
428-
ip_cmsg_recv_offset(msg, skb,
427+
ip_cmsg_recv_offset(msg, sk, skb,
429428
sizeof(struct udphdr), off);
430429
} else {
431430
if (np->rxopt.all)

net/rxrpc/input.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
10531053

10541054
ASSERT(!irqs_disabled());
10551055

1056-
skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
1056+
skb = skb_recv_udp(udp_sk, 0, 1, &ret);
10571057
if (!skb) {
10581058
if (ret == -EAGAIN)
10591059
return;
@@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
10751075

10761076
__UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
10771077

1078-
/* The socket buffer we have is owned by UDP, with UDP's data all over
1079-
* it, but we really want our own data there.
1078+
/* The UDP protocol already released all skb resources;
1079+
* we are free to add our own data there.
10801080
*/
1081-
skb_orphan(skb);
10821081
sp = rxrpc_skb(skb);
10831082

10841083
/* dig out the RxRPC connection details */

net/sunrpc/svcsock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
547547
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
548548
0, 0, MSG_PEEK | MSG_DONTWAIT);
549549
if (err >= 0)
550-
skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);
550+
skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
551551

552552
if (skb == NULL) {
553553
if (err != -EAGAIN) {

net/sunrpc/xprtsock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
10801080
if (sk == NULL)
10811081
goto out;
10821082
for (;;) {
1083-
skb = skb_recv_datagram(sk, 0, 1, &err);
1083+
skb = skb_recv_udp(sk, 0, 1, &err);
10841084
if (skb != NULL) {
10851085
xs_udp_data_read_skb(&transport->xprt, sk, skb);
10861086
consume_skb(skb);

net/unix/af_unix.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
21132113
mutex_lock(&u->iolock);
21142114

21152115
skip = sk_peek_offset(sk, flags);
2116-
skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2117-
&last);
2116+
skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2117+
&err, &last);
21182118
if (skb)
21192119
break;
21202120

0 commit comments

Comments
 (0)