Skip to content

Commit ab7db91

Browse files
Michael Daltondavem330
authored andcommitted
virtio-net: auto-tune mergeable rx buffer size for improved performance
Commit 2613af0 ("virtio_net: migrate mergeable rx buffers to page frag allocators") changed the mergeable receive buffer size from PAGE_SIZE to MTU-size, introducing a single-stream regression for benchmarks with large average packet size. There is no single optimal buffer size for all workloads. For workloads with packet size <= MTU bytes, MTU + virtio-net header-sized buffers are preferred as larger buffers reduce the TCP window due to SKB truesize. However, single-stream workloads with large average packet sizes have higher throughput if larger (e.g., PAGE_SIZE) buffers are used. This commit auto-tunes the mergeable receiver buffer packet size by choosing the packet buffer size based on an EWMA of the recent packet sizes for the receive queue. Packet buffer sizes range from MTU_SIZE + virtio-net header len to PAGE_SIZE. This improves throughput for large packet workloads, as any workload with average packet size >= PAGE_SIZE will use PAGE_SIZE buffers. These optimizations interact positively with recent commit ba27524 ("virtio-net: coalesce rx frags when possible during rx"), which coalesces adjacent RX SKB fragments in virtio_net. The coalescing optimizations benefit buffers of any size. Benchmarks taken from an average of 5 netperf 30-second TCP_STREAM runs between two QEMU VMs on a single physical machine. Each VM has two VCPUs with all offloads & vhost enabled. All VMs and vhost threads run in a single 4 CPU cgroup cpuset, using cgroups to ensure that other processes in the system will not be scheduled on the benchmark CPUs. Trunk includes SKB rx frag coalescing. net-next w/ virtio_net before 2613af0 (PAGE_SIZE bufs): 14642.85Gb/s net-next (MTU-size bufs): 13170.01Gb/s net-next + auto-tune: 14555.94Gb/s Jason Wang also reported a throughput increase on mlx4 from 22Gb/s using MTU-sized buffers to about 26Gb/s using auto-tuning. Signed-off-by: Michael Dalton <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent fb51879 commit ab7db91

File tree

1 file changed

+75
-25
lines changed

1 file changed

+75
-25
lines changed

drivers/net/virtio_net.c

Lines changed: 75 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <linux/if_vlan.h>
2727
#include <linux/slab.h>
2828
#include <linux/cpu.h>
29+
#include <linux/average.h>
2930

3031
static int napi_weight = NAPI_POLL_WEIGHT;
3132
module_param(napi_weight, int, 0444);
@@ -36,11 +37,18 @@ module_param(gso, bool, 0444);
3637

3738
/* FIXME: MTU in config. */
3839
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
39-
#define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \
40-
sizeof(struct virtio_net_hdr_mrg_rxbuf), \
41-
L1_CACHE_BYTES))
4240
#define GOOD_COPY_LEN 128
4341

42+
/* Weight used for the RX packet size EWMA. The average packet size is used to
43+
* determine the packet buffer size when refilling RX rings. As the entire RX
44+
* ring may be refilled at once, the weight is chosen so that the EWMA will be
45+
* insensitive to short-term, transient changes in packet size.
46+
*/
47+
#define RECEIVE_AVG_WEIGHT 64
48+
49+
/* Minimum alignment for mergeable packet buffers. */
50+
#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256)
51+
4452
#define VIRTNET_DRIVER_VERSION "1.0.0"
4553

4654
struct virtnet_stats {
@@ -75,6 +83,9 @@ struct receive_queue {
7583
/* Chain pages by the private ptr. */
7684
struct page *pages;
7785

86+
/* Average packet length for mergeable receive buffers. */
87+
struct ewma mrg_avg_pkt_len;
88+
7889
/* Page frag for packet buffer allocation. */
7990
struct page_frag alloc_frag;
8091

@@ -216,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq)
216227
netif_wake_subqueue(vi->dev, vq2txq(vq));
217228
}
218229

230+
static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
231+
{
232+
unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
233+
return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
234+
}
235+
236+
static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
237+
{
238+
return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
239+
240+
}
241+
242+
static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
243+
{
244+
unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
245+
return (unsigned long)buf | (size - 1);
246+
}
247+
219248
/* Called from bottom half context */
220249
static struct sk_buff *page_to_skb(struct receive_queue *rq,
221250
struct page *page, unsigned int offset,
@@ -324,31 +353,33 @@ static struct sk_buff *receive_big(struct net_device *dev,
324353

325354
static struct sk_buff *receive_mergeable(struct net_device *dev,
326355
struct receive_queue *rq,
327-
void *buf,
356+
unsigned long ctx,
328357
unsigned int len)
329358
{
359+
void *buf = mergeable_ctx_to_buf_address(ctx);
330360
struct skb_vnet_hdr *hdr = buf;
331361
int num_buf = hdr->mhdr.num_buffers;
332362
struct page *page = virt_to_head_page(buf);
333363
int offset = buf - page_address(page);
334-
unsigned int truesize = max_t(unsigned int, len, MERGE_BUFFER_LEN);
364+
unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
365+
335366
struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize);
336367
struct sk_buff *curr_skb = head_skb;
337368

338369
if (unlikely(!curr_skb))
339370
goto err_skb;
340-
341371
while (--num_buf) {
342372
int num_skb_frags;
343373

344-
buf = virtqueue_get_buf(rq->vq, &len);
345-
if (unlikely(!buf)) {
374+
ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
375+
if (unlikely(!ctx)) {
346376
pr_debug("%s: rx error: %d buffers out of %d missing\n",
347377
dev->name, num_buf, hdr->mhdr.num_buffers);
348378
dev->stats.rx_length_errors++;
349379
goto err_buf;
350380
}
351381

382+
buf = mergeable_ctx_to_buf_address(ctx);
352383
page = virt_to_head_page(buf);
353384

354385
num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
@@ -365,7 +396,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
365396
head_skb->truesize += nskb->truesize;
366397
num_skb_frags = 0;
367398
}
368-
truesize = max_t(unsigned int, len, MERGE_BUFFER_LEN);
399+
truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
369400
if (curr_skb != head_skb) {
370401
head_skb->data_len += len;
371402
head_skb->len += len;
@@ -382,19 +413,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
382413
}
383414
}
384415

416+
ewma_add(&rq->mrg_avg_pkt_len, head_skb->len);
385417
return head_skb;
386418

387419
err_skb:
388420
put_page(page);
389421
while (--num_buf) {
390-
buf = virtqueue_get_buf(rq->vq, &len);
391-
if (unlikely(!buf)) {
422+
ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
423+
if (unlikely(!ctx)) {
392424
pr_debug("%s: rx error: %d buffers missing\n",
393425
dev->name, num_buf);
394426
dev->stats.rx_length_errors++;
395427
break;
396428
}
397-
page = virt_to_head_page(buf);
429+
page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
398430
put_page(page);
399431
}
400432
err_buf:
@@ -414,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
414446
if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
415447
pr_debug("%s: short packet %i\n", dev->name, len);
416448
dev->stats.rx_length_errors++;
417-
if (vi->mergeable_rx_bufs)
418-
put_page(virt_to_head_page(buf));
419-
else if (vi->big_packets)
449+
if (vi->mergeable_rx_bufs) {
450+
unsigned long ctx = (unsigned long)buf;
451+
void *base = mergeable_ctx_to_buf_address(ctx);
452+
put_page(virt_to_head_page(base));
453+
} else if (vi->big_packets) {
420454
give_pages(rq, buf);
421-
else
455+
} else {
422456
dev_kfree_skb(buf);
457+
}
423458
return;
424459
}
425460

426461
if (vi->mergeable_rx_bufs)
427-
skb = receive_mergeable(dev, rq, buf, len);
462+
skb = receive_mergeable(dev, rq, (unsigned long)buf, len);
428463
else if (vi->big_packets)
429464
skb = receive_big(dev, rq, buf, len);
430465
else
@@ -567,25 +602,36 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
567602

568603
static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
569604
{
605+
const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
570606
struct page_frag *alloc_frag = &rq->alloc_frag;
571607
char *buf;
608+
unsigned long ctx;
572609
int err;
573610
unsigned int len, hole;
574611

575-
if (unlikely(!skb_page_frag_refill(MERGE_BUFFER_LEN, alloc_frag, gfp)))
612+
len = hdr_len + clamp_t(unsigned int, ewma_read(&rq->mrg_avg_pkt_len),
613+
GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
614+
len = ALIGN(len, MERGEABLE_BUFFER_ALIGN);
615+
if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
576616
return -ENOMEM;
617+
577618
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
619+
ctx = mergeable_buf_to_ctx(buf, len);
578620
get_page(alloc_frag->page);
579-
len = MERGE_BUFFER_LEN;
580621
alloc_frag->offset += len;
581622
hole = alloc_frag->size - alloc_frag->offset;
582-
if (hole < MERGE_BUFFER_LEN) {
623+
if (hole < len) {
624+
/* To avoid internal fragmentation, if there is very likely not
625+
* enough space for another buffer, add the remaining space to
626+
* the current buffer. This extra space is not included in
627+
* the truesize stored in ctx.
628+
*/
583629
len += hole;
584630
alloc_frag->offset += hole;
585631
}
586632

587633
sg_init_one(rq->sg, buf, len);
588-
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
634+
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
589635
if (err < 0)
590636
put_page(virt_to_head_page(buf));
591637

@@ -1385,12 +1431,15 @@ static void free_unused_bufs(struct virtnet_info *vi)
13851431
struct virtqueue *vq = vi->rq[i].vq;
13861432

13871433
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1388-
if (vi->mergeable_rx_bufs)
1389-
put_page(virt_to_head_page(buf));
1390-
else if (vi->big_packets)
1434+
if (vi->mergeable_rx_bufs) {
1435+
unsigned long ctx = (unsigned long)buf;
1436+
void *base = mergeable_ctx_to_buf_address(ctx);
1437+
put_page(virt_to_head_page(base));
1438+
} else if (vi->big_packets) {
13911439
give_pages(&vi->rq[i], buf);
1392-
else
1440+
} else {
13931441
dev_kfree_skb(buf);
1442+
}
13941443
}
13951444
}
13961445
}
@@ -1498,6 +1547,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
14981547
napi_weight);
14991548

15001549
sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1550+
ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
15011551
sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
15021552
}
15031553

0 commit comments

Comments
 (0)