Skip to content

Commit bec1f6f

Browse files
wdebruijdavem330
authored andcommitted
udp: generate gso with UDP_SEGMENT
Support generic segmentation offload for udp datagrams. Callers can concatenate and send at once the payload of multiple datagrams with the same destination. To set segment size, the caller sets socket option UDP_SEGMENT to the length of each discrete payload. This value must be smaller than or equal to the relevant MTU. A follow-up patch adds cmsg UDP_SEGMENT to specify segment size on a per send call basis. Total byte length may then exceed MTU. If not an exact multiple of segment size, the last segment will be shorter. The implementation adds a gso_size field to the udp socket, ip(v6) cmsg cookie and inet_cork structure to be able to set the value at setsockopt or cmsg time and to work with both lockless and corked paths. Initial benchmark numbers show UDP GSO about as expensive as TCP GSO. tcp tso 3197 MB/s 54232 msg/s 54232 calls/s 6,457,754,262 cycles tcp gso 1765 MB/s 29939 msg/s 29939 calls/s 11,203,021,806 cycles tcp without tso/gso * 739 MB/s 12548 msg/s 12548 calls/s 11,205,483,630 cycles udp 876 MB/s 14873 msg/s 624666 calls/s 11,205,777,429 cycles udp gso 2139 MB/s 36282 msg/s 36282 calls/s 11,204,374,561 cycles [*] after reverting commit 0a6b2a1 ("tcp: switch to GSO being always on") Measured total system cycles ('-a') for one core while pinning both the network receive path and benchmark process to that core: perf stat -a -C 12 -e cycles \ ./udpgso_bench_tx -C 12 -4 -D "$DST" -l 4 Note the reduction in calls/s with GSO. Bytes per syscall drops increases from 1470 to 61818. Signed-off-by: Willem de Bruijn <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent ee80d1e commit bec1f6f

File tree

9 files changed

+67
-11
lines changed

9 files changed

+67
-11
lines changed

include/linux/udp.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ struct udp_sock {
5555
* when the socket is uncorked.
5656
*/
5757
__u16 len; /* total length of pending frames */
58+
__u16 gso_size;
5859
/*
5960
* Fields specific to UDP-Lite.
6061
*/
@@ -87,6 +88,8 @@ struct udp_sock {
8788
int forward_deficit;
8889
};
8990

91+
#define UDP_MAX_SEGMENTS (1 << 6UL)
92+
9093
static inline struct udp_sock *udp_sk(const struct sock *sk)
9194
{
9295
return (struct udp_sock *)sk;

include/net/inet_sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ struct inet_cork {
147147
__u8 ttl;
148148
__s16 tos;
149149
char priority;
150+
__u16 gso_size;
150151
};
151152

152153
struct inet_cork_full {

include/net/ip.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ struct ipcm_cookie {
7676
__u8 ttl;
7777
__s16 tos;
7878
char priority;
79+
__u16 gso_size;
7980
};
8081

8182
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))

include/net/ipv6.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ struct ipcm6_cookie {
298298
__s16 tclass;
299299
__s8 dontfrag;
300300
struct ipv6_txoptions *opt;
301+
__u16 gso_size;
301302
};
302303

303304
static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)

include/uapi/linux/udp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct udphdr {
3232
#define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */
3333
#define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */
3434
#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */
35+
#define UDP_SEGMENT 103 /* Set GSO segmentation size */
3536

3637
/* UDP encapsulation types */
3738
#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */

net/ipv4/ip_output.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,8 @@ static int __ip_append_data(struct sock *sk,
882882
skb = skb_peek_tail(queue);
883883

884884
exthdrlen = !skb ? rt->dst.header_len : 0;
885-
mtu = cork->fragsize;
885+
mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
886+
886887
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
887888
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
888889
tskey = sk->sk_tskey++;
@@ -906,7 +907,7 @@ static int __ip_append_data(struct sock *sk,
906907
if (transhdrlen &&
907908
length + fragheaderlen <= mtu &&
908909
rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
909-
!(flags & MSG_MORE) &&
910+
(!(flags & MSG_MORE) || cork->gso_size) &&
910911
!exthdrlen)
911912
csummode = CHECKSUM_PARTIAL;
912913

@@ -1135,6 +1136,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
11351136
*rtp = NULL;
11361137
cork->fragsize = ip_sk_use_pmtu(sk) ?
11371138
dst_mtu(&rt->dst) : rt->dst.dev->mtu;
1139+
1140+
cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0;
11381141
cork->dst = &rt->dst;
11391142
cork->length = 0;
11401143
cork->ttl = ipc->ttl;
@@ -1214,7 +1217,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
12141217
return -EOPNOTSUPP;
12151218

12161219
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1217-
mtu = cork->fragsize;
1220+
mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
12181221

12191222
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
12201223
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;

net/ipv4/udp.c

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
757757
}
758758
EXPORT_SYMBOL(udp_set_csum);
759759

760-
static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
760+
static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
761+
struct inet_cork *cork)
761762
{
762763
struct sock *sk = skb->sk;
763764
struct inet_sock *inet = inet_sk(sk);
@@ -777,6 +778,21 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
777778
uh->len = htons(len);
778779
uh->check = 0;
779780

781+
if (cork->gso_size) {
782+
const int hlen = skb_network_header_len(skb) +
783+
sizeof(struct udphdr);
784+
785+
if (hlen + cork->gso_size > cork->fragsize)
786+
return -EINVAL;
787+
if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
788+
return -EINVAL;
789+
if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
790+
return -EIO;
791+
792+
skb_shinfo(skb)->gso_size = cork->gso_size;
793+
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
794+
}
795+
780796
if (is_udplite) /* UDP-Lite */
781797
csum = udplite_csum(skb);
782798

@@ -828,7 +844,7 @@ int udp_push_pending_frames(struct sock *sk)
828844
if (!skb)
829845
goto out;
830846

831-
err = udp_send_skb(skb, fl4);
847+
err = udp_send_skb(skb, fl4, &inet->cork.base);
832848

833849
out:
834850
up->len = 0;
@@ -922,6 +938,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
922938
ipc.sockc.tsflags = sk->sk_tsflags;
923939
ipc.addr = inet->inet_saddr;
924940
ipc.oif = sk->sk_bound_dev_if;
941+
ipc.gso_size = up->gso_size;
925942

926943
if (msg->msg_controllen) {
927944
err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6);
@@ -1037,7 +1054,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
10371054
&cork, msg->msg_flags);
10381055
err = PTR_ERR(skb);
10391056
if (!IS_ERR_OR_NULL(skb))
1040-
err = udp_send_skb(skb, fl4);
1057+
err = udp_send_skb(skb, fl4, &cork);
10411058
goto out;
10421059
}
10431060

@@ -2367,6 +2384,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
23672384
up->no_check6_rx = valbool;
23682385
break;
23692386

2387+
case UDP_SEGMENT:
2388+
if (val < 0 || val > USHRT_MAX)
2389+
return -EINVAL;
2390+
up->gso_size = val;
2391+
break;
2392+
23702393
/*
23712394
* UDP-Lite's partial checksum coverage (RFC 3828).
23722395
*/
@@ -2457,6 +2480,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
24572480
val = up->no_check6_rx;
24582481
break;
24592482

2483+
case UDP_SEGMENT:
2484+
val = up->gso_size;
2485+
break;
2486+
24602487
/* The following two cannot be changed on UDP sockets, the return is
24612488
* always 0 (which corresponds to the full checksum coverage of UDP). */
24622489
case UDPLITE_SEND_CSCOV:

net/ipv6/ip6_output.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
12401240
if (mtu < IPV6_MIN_MTU)
12411241
return -EINVAL;
12421242
cork->base.fragsize = mtu;
1243+
cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0;
1244+
12431245
if (dst_allfrag(xfrm_dst_path(&rt->dst)))
12441246
cork->base.flags |= IPCORK_ALLFRAG;
12451247
cork->base.length = 0;
@@ -1281,7 +1283,7 @@ static int __ip6_append_data(struct sock *sk,
12811283
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
12821284
}
12831285

1284-
mtu = cork->fragsize;
1286+
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
12851287
orig_mtu = mtu;
12861288

12871289
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1329,7 +1331,7 @@ static int __ip6_append_data(struct sock *sk,
13291331
if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
13301332
headersize == sizeof(struct ipv6hdr) &&
13311333
length <= mtu - headersize &&
1332-
!(flags & MSG_MORE) &&
1334+
(!(flags & MSG_MORE) || cork->gso_size) &&
13331335
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
13341336
csummode = CHECKSUM_PARTIAL;
13351337

net/ipv6/udp.c

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,8 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
10231023
* Sending
10241024
*/
10251025

1026-
static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
1026+
static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
1027+
struct inet_cork *cork)
10271028
{
10281029
struct sock *sk = skb->sk;
10291030
struct udphdr *uh;
@@ -1042,6 +1043,21 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
10421043
uh->len = htons(len);
10431044
uh->check = 0;
10441045

1046+
if (cork->gso_size) {
1047+
const int hlen = skb_network_header_len(skb) +
1048+
sizeof(struct udphdr);
1049+
1050+
if (hlen + cork->gso_size > cork->fragsize)
1051+
return -EINVAL;
1052+
if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
1053+
return -EINVAL;
1054+
if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
1055+
return -EIO;
1056+
1057+
skb_shinfo(skb)->gso_size = cork->gso_size;
1058+
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
1059+
}
1060+
10451061
if (is_udplite)
10461062
csum = udplite_csum(skb);
10471063
else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
@@ -1093,7 +1109,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
10931109
if (!skb)
10941110
goto out;
10951111

1096-
err = udp_v6_send_skb(skb, &fl6);
1112+
err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base);
10971113

10981114
out:
10991115
up->len = 0;
@@ -1127,6 +1143,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
11271143
ipc6.hlimit = -1;
11281144
ipc6.tclass = -1;
11291145
ipc6.dontfrag = -1;
1146+
ipc6.gso_size = up->gso_size;
11301147
sockc.tsflags = sk->sk_tsflags;
11311148

11321149
/* destination address check */
@@ -1333,7 +1350,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
13331350
msg->msg_flags, &cork, &sockc);
13341351
err = PTR_ERR(skb);
13351352
if (!IS_ERR_OR_NULL(skb))
1336-
err = udp_v6_send_skb(skb, &fl6);
1353+
err = udp_v6_send_skb(skb, &fl6, &cork.base);
13371354
goto out;
13381355
}
13391356

0 commit comments

Comments
 (0)