Skip to content

Commit b65ac44

Browse files
Paolo Abenidavem330
authored andcommitted
udp: try to avoid 2 cache miss on dequeue
when udp_recvmsg() is executed, on x86_64 and other archs, most skb fields are on cold cachelines. If the skb are linear and the kernel don't need to compute the udp csum, only a handful of skb fields are required by udp_recvmsg(). Since we already use skb->dev_scratch to cache hot data, and there are 32 bits unused on 64 bit archs, use such field to cache as much data as we can, and try to prefetch on dequeue the relevant fields that are left out. This can save up to 2 cache miss per packet. v1 -> v2: - changed udp_dev_scratch fields types to u{32,16} variant, replaced bitfiled with bool Signed-off-by: Paolo Abeni <[email protected]> Acked-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 0a463c7 commit b65ac44

File tree

1 file changed

+103
-11
lines changed

1 file changed

+103
-11
lines changed

net/ipv4/udp.c

Lines changed: 103 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,83 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
11631163
return ret;
11641164
}
11651165

1166+
/* Copy as much information as possible into skb->dev_scratch to avoid
1167+
* possibly multiple cache miss on dequeue();
1168+
*/
1169+
#if BITS_PER_LONG == 64
1170+
1171+
/* we can store multiple info here: truesize, len and the bit needed to
1172+
* compute skb_csum_unnecessary will be on cold cache lines at recvmsg
1173+
* time.
1174+
* skb->len can be stored on 16 bits since the udp header has been already
1175+
* validated and pulled.
1176+
*/
1177+
struct udp_dev_scratch {
1178+
u32 truesize;
1179+
u16 len;
1180+
bool is_linear;
1181+
bool csum_unnecessary;
1182+
};
1183+
1184+
static void udp_set_dev_scratch(struct sk_buff *skb)
1185+
{
1186+
struct udp_dev_scratch *scratch;
1187+
1188+
BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
1189+
scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
1190+
scratch->truesize = skb->truesize;
1191+
scratch->len = skb->len;
1192+
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
1193+
scratch->is_linear = !skb_is_nonlinear(skb);
1194+
}
1195+
1196+
static int udp_skb_truesize(struct sk_buff *skb)
1197+
{
1198+
return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
1199+
}
1200+
1201+
static unsigned int udp_skb_len(struct sk_buff *skb)
1202+
{
1203+
return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
1204+
}
1205+
1206+
static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
1207+
{
1208+
return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
1209+
}
1210+
1211+
static bool udp_skb_is_linear(struct sk_buff *skb)
1212+
{
1213+
return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
1214+
}
1215+
1216+
#else
1217+
static void udp_set_dev_scratch(struct sk_buff *skb)
1218+
{
1219+
skb->dev_scratch = skb->truesize;
1220+
}
1221+
1222+
static int udp_skb_truesize(struct sk_buff *skb)
1223+
{
1224+
return skb->dev_scratch;
1225+
}
1226+
1227+
static unsigned int udp_skb_len(struct sk_buff *skb)
1228+
{
1229+
return skb->len;
1230+
}
1231+
1232+
static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
1233+
{
1234+
return skb_csum_unnecessary(skb);
1235+
}
1236+
1237+
static bool udp_skb_is_linear(struct sk_buff *skb)
1238+
{
1239+
return !skb_is_nonlinear(skb);
1240+
}
1241+
#endif
1242+
11661243
/* fully reclaim rmem/fwd memory allocated for skb */
11671244
static void udp_rmem_release(struct sock *sk, int size, int partial,
11681245
bool rx_queue_lock_held)
@@ -1213,14 +1290,16 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
12131290
*/
12141291
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
12151292
{
1216-
udp_rmem_release(sk, skb->dev_scratch, 1, false);
1293+
prefetch(&skb->data);
1294+
udp_rmem_release(sk, udp_skb_truesize(skb), 1, false);
12171295
}
12181296
EXPORT_SYMBOL(udp_skb_destructor);
12191297

12201298
/* as above, but the caller held the rx queue lock, too */
12211299
static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
12221300
{
1223-
udp_rmem_release(sk, skb->dev_scratch, 1, true);
1301+
prefetch(&skb->data);
1302+
udp_rmem_release(sk, udp_skb_truesize(skb), 1, true);
12241303
}
12251304

12261305
/* Idea of busylocks is to let producers grab an extra spinlock
@@ -1274,10 +1353,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
12741353
busy = busylock_acquire(sk);
12751354
}
12761355
size = skb->truesize;
1277-
/* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss
1278-
* in udp_skb_destructor()
1279-
*/
1280-
skb->dev_scratch = size;
1356+
udp_set_dev_scratch(skb);
12811357

12821358
/* we drop only if the receive buf is full and the receive
12831359
* queue contains some other skb
@@ -1515,6 +1591,18 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
15151591
}
15161592
EXPORT_SYMBOL_GPL(__skb_recv_udp);
15171593

1594+
static int copy_linear_skb(struct sk_buff *skb, int len, int off,
1595+
struct iov_iter *to)
1596+
{
1597+
int n, copy = len - off;
1598+
1599+
n = copy_to_iter(skb->data + off, copy, to);
1600+
if (n == copy)
1601+
return 0;
1602+
1603+
return -EFAULT;
1604+
}
1605+
15181606
/*
15191607
* This should be easy, if there is something there we
15201608
* return it, otherwise we block.
@@ -1541,7 +1629,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
15411629
if (!skb)
15421630
return err;
15431631

1544-
ulen = skb->len;
1632+
ulen = udp_skb_len(skb);
15451633
copied = len;
15461634
if (copied > ulen - off)
15471635
copied = ulen - off;
@@ -1556,14 +1644,18 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
15561644

15571645
if (copied < ulen || peeking ||
15581646
(is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
1559-
checksum_valid = !udp_lib_checksum_complete(skb);
1647+
checksum_valid = udp_skb_csum_unnecessary(skb) ||
1648+
!__udp_lib_checksum_complete(skb);
15601649
if (!checksum_valid)
15611650
goto csum_copy_err;
15621651
}
15631652

1564-
if (checksum_valid || skb_csum_unnecessary(skb))
1565-
err = skb_copy_datagram_msg(skb, off, msg, copied);
1566-
else {
1653+
if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
1654+
if (udp_skb_is_linear(skb))
1655+
err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
1656+
else
1657+
err = skb_copy_datagram_msg(skb, off, msg, copied);
1658+
} else {
15671659
err = skb_copy_and_csum_datagram_msg(skb, off, msg);
15681660

15691661
if (err == -EINVAL)

0 commit comments

Comments
 (0)