Skip to content

Commit 850cbad

Browse files
Paolo Abenidavem330
authored andcommitted
udp: use it's own memory accounting schema
Completely avoid default sock memory accounting and replace it with udp-specific accounting. Since the new memory accounting model encapsulates completely the required locking, remove the socket lock on both enqueue and dequeue, and avoid using the backlog on enqueue. Be sure to clean-up rx queue memory on socket destruction, using udp its own sk_destruct. Tested using pktgen with random src port, 64 bytes packet, wire-speed on a 10G link as sender and udp_sink as the receiver, using an l4 tuple rxhash to stress the contention, and one or more udp_sink instances with reuseport. nr readers Kpps (vanilla) Kpps (patched) 1 170 440 3 1250 2150 6 3000 3650 9 4200 4450 12 5700 6250 v4 -> v5: - avoid unneeded test in first_packet_length v3 -> v4: - remove useless sk_rcvqueues_full() call v2 -> v3: - do not set the now unsed backlog_rcv callback v1 -> v2: - add memory pressure support - fixed dropwatch accounting for ipv6 Acked-by: Hannes Frederic Sowa <[email protected]> Signed-off-by: Paolo Abeni <[email protected]> Acked-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f970bd9 commit 850cbad

File tree

4 files changed

+33
-66
lines changed

4 files changed

+33
-66
lines changed

net/ipv4/udp.c

Lines changed: 8 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,13 +1307,7 @@ static int first_packet_length(struct sock *sk)
13071307
res = skb ? skb->len : -1;
13081308
spin_unlock_bh(&rcvq->lock);
13091309

1310-
if (!skb_queue_empty(&list_kill)) {
1311-
bool slow = lock_sock_fast(sk);
1312-
1313-
__skb_queue_purge(&list_kill);
1314-
sk_mem_reclaim_partial(sk);
1315-
unlock_sock_fast(sk, slow);
1316-
}
1310+
__skb_queue_purge(&list_kill);
13171311
return res;
13181312
}
13191313

@@ -1362,7 +1356,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
13621356
int err;
13631357
int is_udplite = IS_UDPLITE(sk);
13641358
bool checksum_valid = false;
1365-
bool slow;
13661359

13671360
if (flags & MSG_ERRQUEUE)
13681361
return ip_recv_error(sk, msg, len, addr_len);
@@ -1403,13 +1396,12 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
14031396
}
14041397

14051398
if (unlikely(err)) {
1406-
trace_kfree_skb(skb, udp_recvmsg);
14071399
if (!peeked) {
14081400
atomic_inc(&sk->sk_drops);
14091401
UDP_INC_STATS(sock_net(sk),
14101402
UDP_MIB_INERRORS, is_udplite);
14111403
}
1412-
skb_free_datagram_locked(sk, skb);
1404+
kfree_skb(skb);
14131405
return err;
14141406
}
14151407

@@ -1434,16 +1426,15 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
14341426
if (flags & MSG_TRUNC)
14351427
err = ulen;
14361428

1437-
__skb_free_datagram_locked(sk, skb, peeking ? -err : err);
1429+
skb_consume_udp(sk, skb, peeking ? -err : err);
14381430
return err;
14391431

14401432
csum_copy_err:
1441-
slow = lock_sock_fast(sk);
1442-
if (!skb_kill_datagram(sk, skb, flags)) {
1433+
if (!__sk_queue_drop_skb(sk, skb, flags)) {
14431434
UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
14441435
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
14451436
}
1446-
unlock_sock_fast(sk, slow);
1437+
kfree_skb(skb);
14471438

14481439
/* starting over for a new packet, but check if we need to yield */
14491440
cond_resched();
@@ -1562,7 +1553,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
15621553
sk_incoming_cpu_update(sk);
15631554
}
15641555

1565-
rc = __sock_queue_rcv_skb(sk, skb);
1556+
rc = __udp_enqueue_schedule_skb(sk, skb);
15661557
if (rc < 0) {
15671558
int is_udplite = IS_UDPLITE(sk);
15681559

@@ -1577,7 +1568,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
15771568
}
15781569

15791570
return 0;
1580-
15811571
}
15821572

15831573
static struct static_key udp_encap_needed __read_mostly;
@@ -1599,7 +1589,6 @@ EXPORT_SYMBOL(udp_encap_enable);
15991589
int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
16001590
{
16011591
struct udp_sock *up = udp_sk(sk);
1602-
int rc;
16031592
int is_udplite = IS_UDPLITE(sk);
16041593

16051594
/*
@@ -1686,25 +1675,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
16861675
goto drop;
16871676

16881677
udp_csum_pull_header(skb);
1689-
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
1690-
__UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1691-
is_udplite);
1692-
goto drop;
1693-
}
1694-
1695-
rc = 0;
16961678

16971679
ipv4_pktinfo_prepare(sk, skb);
1698-
bh_lock_sock(sk);
1699-
if (!sock_owned_by_user(sk))
1700-
rc = __udp_queue_rcv_skb(sk, skb);
1701-
else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
1702-
bh_unlock_sock(sk);
1703-
goto drop;
1704-
}
1705-
bh_unlock_sock(sk);
1706-
1707-
return rc;
1680+
return __udp_queue_rcv_skb(sk, skb);
17081681

17091682
csum_error:
17101683
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -2314,13 +2287,13 @@ struct proto udp_prot = {
23142287
.connect = ip4_datagram_connect,
23152288
.disconnect = udp_disconnect,
23162289
.ioctl = udp_ioctl,
2290+
.init = udp_init_sock,
23172291
.destroy = udp_destroy_sock,
23182292
.setsockopt = udp_setsockopt,
23192293
.getsockopt = udp_getsockopt,
23202294
.sendmsg = udp_sendmsg,
23212295
.recvmsg = udp_recvmsg,
23222296
.sendpage = udp_sendpage,
2323-
.backlog_rcv = __udp_queue_rcv_skb,
23242297
.release_cb = ip4_datagram_release_cb,
23252298
.hash = udp_lib_hash,
23262299
.unhash = udp_lib_unhash,

net/ipv6/udp.c

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
334334
int is_udplite = IS_UDPLITE(sk);
335335
bool checksum_valid = false;
336336
int is_udp4;
337-
bool slow;
338337

339338
if (flags & MSG_ERRQUEUE)
340339
return ipv6_recv_error(sk, msg, len, addr_len);
@@ -378,7 +377,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
378377
goto csum_copy_err;
379378
}
380379
if (unlikely(err)) {
381-
trace_kfree_skb(skb, udpv6_recvmsg);
382380
if (!peeked) {
383381
atomic_inc(&sk->sk_drops);
384382
if (is_udp4)
@@ -388,7 +386,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
388386
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
389387
is_udplite);
390388
}
391-
skb_free_datagram_locked(sk, skb);
389+
kfree_skb(skb);
392390
return err;
393391
}
394392
if (!peeked) {
@@ -437,12 +435,11 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
437435
if (flags & MSG_TRUNC)
438436
err = ulen;
439437

440-
__skb_free_datagram_locked(sk, skb, peeking ? -err : err);
438+
skb_consume_udp(sk, skb, peeking ? -err : err);
441439
return err;
442440

443441
csum_copy_err:
444-
slow = lock_sock_fast(sk);
445-
if (!skb_kill_datagram(sk, skb, flags)) {
442+
if (!__sk_queue_drop_skb(sk, skb, flags)) {
446443
if (is_udp4) {
447444
UDP_INC_STATS(sock_net(sk),
448445
UDP_MIB_CSUMERRORS, is_udplite);
@@ -455,7 +452,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
455452
UDP_MIB_INERRORS, is_udplite);
456453
}
457454
}
458-
unlock_sock_fast(sk, slow);
455+
kfree_skb(skb);
459456

460457
/* starting over for a new packet, but check if we need to yield */
461458
cond_resched();
@@ -523,7 +520,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
523520
sk_incoming_cpu_update(sk);
524521
}
525522

526-
rc = __sock_queue_rcv_skb(sk, skb);
523+
rc = __udp_enqueue_schedule_skb(sk, skb);
527524
if (rc < 0) {
528525
int is_udplite = IS_UDPLITE(sk);
529526

@@ -535,6 +532,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
535532
kfree_skb(skb);
536533
return -1;
537534
}
535+
538536
return 0;
539537
}
540538

@@ -556,7 +554,6 @@ EXPORT_SYMBOL(udpv6_encap_enable);
556554
int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
557555
{
558556
struct udp_sock *up = udp_sk(sk);
559-
int rc;
560557
int is_udplite = IS_UDPLITE(sk);
561558

562559
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
@@ -622,25 +619,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
622619
goto drop;
623620

624621
udp_csum_pull_header(skb);
625-
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
626-
__UDP6_INC_STATS(sock_net(sk),
627-
UDP_MIB_RCVBUFERRORS, is_udplite);
628-
goto drop;
629-
}
630622

631623
skb_dst_drop(skb);
632624

633-
bh_lock_sock(sk);
634-
rc = 0;
635-
if (!sock_owned_by_user(sk))
636-
rc = __udpv6_queue_rcv_skb(sk, skb);
637-
else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
638-
bh_unlock_sock(sk);
639-
goto drop;
640-
}
641-
bh_unlock_sock(sk);
642-
643-
return rc;
625+
return __udpv6_queue_rcv_skb(sk, skb);
644626

645627
csum_error:
646628
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -1433,12 +1415,12 @@ struct proto udpv6_prot = {
14331415
.connect = ip6_datagram_connect,
14341416
.disconnect = udp_disconnect,
14351417
.ioctl = udp_ioctl,
1418+
.init = udp_init_sock,
14361419
.destroy = udpv6_destroy_sock,
14371420
.setsockopt = udpv6_setsockopt,
14381421
.getsockopt = udpv6_getsockopt,
14391422
.sendmsg = udpv6_sendmsg,
14401423
.recvmsg = udpv6_recvmsg,
1441-
.backlog_rcv = __udpv6_queue_rcv_skb,
14421424
.release_cb = ip6_datagram_release_cb,
14431425
.hash = udp_lib_hash,
14441426
.unhash = udp_lib_unhash,

net/sunrpc/svcsock.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <net/checksum.h>
4040
#include <net/ip.h>
4141
#include <net/ipv6.h>
42+
#include <net/udp.h>
4243
#include <net/tcp.h>
4344
#include <net/tcp_states.h>
4445
#include <asm/uaccess.h>
@@ -129,6 +130,18 @@ static void svc_release_skb(struct svc_rqst *rqstp)
129130
}
130131
}
131132

133+
static void svc_release_udp_skb(struct svc_rqst *rqstp)
134+
{
135+
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
136+
137+
if (skb) {
138+
rqstp->rq_xprt_ctxt = NULL;
139+
140+
dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
141+
consume_skb(skb);
142+
}
143+
}
144+
132145
union svc_pktinfo_u {
133146
struct in_pktinfo pkti;
134147
struct in6_pktinfo pkti6;
@@ -575,7 +588,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
575588
goto out_free;
576589
}
577590
local_bh_enable();
578-
skb_free_datagram_locked(svsk->sk_sk, skb);
591+
consume_skb(skb);
579592
} else {
580593
/* we can use it in-place */
581594
rqstp->rq_arg.head[0].iov_base = skb->data;
@@ -602,8 +615,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
602615

603616
return len;
604617
out_free:
605-
trace_kfree_skb(skb, svc_udp_recvfrom);
606-
skb_free_datagram_locked(svsk->sk_sk, skb);
618+
kfree_skb(skb);
607619
return 0;
608620
}
609621

@@ -660,7 +672,7 @@ static struct svc_xprt_ops svc_udp_ops = {
660672
.xpo_create = svc_udp_create,
661673
.xpo_recvfrom = svc_udp_recvfrom,
662674
.xpo_sendto = svc_udp_sendto,
663-
.xpo_release_rqst = svc_release_skb,
675+
.xpo_release_rqst = svc_release_udp_skb,
664676
.xpo_detach = svc_sock_detach,
665677
.xpo_free = svc_sock_free,
666678
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,

net/sunrpc/xprtsock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1083,7 +1083,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
10831083
skb = skb_recv_datagram(sk, 0, 1, &err);
10841084
if (skb != NULL) {
10851085
xs_udp_data_read_skb(&transport->xprt, sk, skb);
1086-
skb_free_datagram_locked(sk, skb);
1086+
consume_skb(skb);
10871087
continue;
10881088
}
10891089
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))

0 commit comments

Comments
 (0)