Skip to content

Commit 362899b

Browse files
jasowangdavem330
authored andcommitted
macvtap: switch to use skb array
This patch switch to use skb array instead of sk_receive_queue to avoid spinlock contentions. Tests shows about 21% improvements for guest rx pps: Before: 1472731 pkts/s After: 1786289 pkts/s Signed-off-by: Jason Wang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 1b16bf4 commit 362899b

File tree

1 file changed

+71
-11
lines changed

1 file changed

+71
-11
lines changed

drivers/net/macvtap.c

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <net/rtnetlink.h>
2222
#include <net/sock.h>
2323
#include <linux/virtio_net.h>
24+
#include <linux/skb_array.h>
2425

2526
/*
2627
* A macvtap queue is the central object of this driver, it connects
@@ -43,6 +44,7 @@ struct macvtap_queue {
4344
u16 queue_index;
4445
bool enabled;
4546
struct list_head next;
47+
struct skb_array skb_array;
4648
};
4749

4850
#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
@@ -273,6 +275,7 @@ static void macvtap_put_queue(struct macvtap_queue *q)
273275
rtnl_unlock();
274276

275277
synchronize_rcu();
278+
skb_array_cleanup(&q->skb_array);
276279
sock_put(&q->sk);
277280
}
278281

@@ -366,7 +369,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
366369
if (!q)
367370
return RX_HANDLER_PASS;
368371

369-
if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
372+
if (__skb_array_full(&q->skb_array))
370373
goto drop;
371374

372375
skb_push(skb, ETH_HLEN);
@@ -384,7 +387,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
384387
goto drop;
385388

386389
if (!segs) {
387-
skb_queue_tail(&q->sk.sk_receive_queue, skb);
390+
if (skb_array_produce(&q->skb_array, skb))
391+
goto drop;
388392
goto wake_up;
389393
}
390394

@@ -393,7 +397,11 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
393397
struct sk_buff *nskb = segs->next;
394398

395399
segs->next = NULL;
396-
skb_queue_tail(&q->sk.sk_receive_queue, segs);
400+
if (skb_array_produce(&q->skb_array, segs)) {
401+
kfree_skb(segs);
402+
kfree_skb_list(nskb);
403+
break;
404+
}
397405
segs = nskb;
398406
}
399407
} else {
@@ -406,7 +414,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
406414
!(features & NETIF_F_CSUM_MASK) &&
407415
skb_checksum_help(skb))
408416
goto drop;
409-
skb_queue_tail(&q->sk.sk_receive_queue, skb);
417+
if (skb_array_produce(&q->skb_array, skb))
418+
goto drop;
410419
}
411420

412421
wake_up:
@@ -523,7 +532,11 @@ static void macvtap_sock_write_space(struct sock *sk)
523532

524533
static void macvtap_sock_destruct(struct sock *sk)
525534
{
526-
skb_queue_purge(&sk->sk_receive_queue);
535+
struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
536+
struct sk_buff *skb;
537+
538+
while ((skb = skb_array_consume(&q->skb_array)) != NULL)
539+
kfree(skb);
527540
}
528541

529542
static int macvtap_open(struct inode *inode, struct file *file)
@@ -536,13 +549,13 @@ static int macvtap_open(struct inode *inode, struct file *file)
536549
rtnl_lock();
537550
dev = dev_get_by_macvtap_minor(iminor(inode));
538551
if (!dev)
539-
goto out;
552+
goto err;
540553

541554
err = -ENOMEM;
542555
q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
543556
&macvtap_proto, 0);
544557
if (!q)
545-
goto out;
558+
goto err;
546559

547560
RCU_INIT_POINTER(q->sock.wq, &q->wq);
548561
init_waitqueue_head(&q->wq.wait);
@@ -566,11 +579,24 @@ static int macvtap_open(struct inode *inode, struct file *file)
566579
if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
567580
sock_set_flag(&q->sk, SOCK_ZEROCOPY);
568581

582+
err = -ENOMEM;
583+
if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
584+
goto err_array;
585+
569586
err = macvtap_set_queue(dev, file, q);
570587
if (err)
571-
sock_put(&q->sk);
588+
goto err_queue;
572589

573-
out:
590+
dev_put(dev);
591+
592+
rtnl_unlock();
593+
return err;
594+
595+
err_queue:
596+
skb_array_cleanup(&q->skb_array);
597+
err_array:
598+
sock_put(&q->sk);
599+
err:
574600
if (dev)
575601
dev_put(dev);
576602

@@ -596,7 +622,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
596622
mask = 0;
597623
poll_wait(file, &q->wq.wait, wait);
598624

599-
if (!skb_queue_empty(&q->sk.sk_receive_queue))
625+
if (!skb_array_empty(&q->skb_array))
600626
mask |= POLLIN | POLLRDNORM;
601627

602628
if (sock_writeable(&q->sk) ||
@@ -856,7 +882,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q,
856882
TASK_INTERRUPTIBLE);
857883

858884
/* Read frames from the queue */
859-
skb = skb_dequeue(&q->sk.sk_receive_queue);
885+
skb = skb_array_consume(&q->skb_array);
860886
if (skb)
861887
break;
862888
if (noblock) {
@@ -1180,10 +1206,18 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m,
11801206
return ret;
11811207
}
11821208

1209+
static int macvtap_peek_len(struct socket *sock)
1210+
{
1211+
struct macvtap_queue *q = container_of(sock, struct macvtap_queue,
1212+
sock);
1213+
return skb_array_peek_len(&q->skb_array);
1214+
}
1215+
11831216
/* Ops structure to mimic raw sockets with tun */
11841217
static const struct proto_ops macvtap_socket_ops = {
11851218
.sendmsg = macvtap_sendmsg,
11861219
.recvmsg = macvtap_recvmsg,
1220+
.peek_len = macvtap_peek_len,
11871221
};
11881222

11891223
/* Get an underlying socket object from tun file. Returns error unless file is
@@ -1202,6 +1236,28 @@ struct socket *macvtap_get_socket(struct file *file)
12021236
}
12031237
EXPORT_SYMBOL_GPL(macvtap_get_socket);
12041238

1239+
static int macvtap_queue_resize(struct macvlan_dev *vlan)
1240+
{
1241+
struct net_device *dev = vlan->dev;
1242+
struct macvtap_queue *q;
1243+
struct skb_array **arrays;
1244+
int n = vlan->numqueues;
1245+
int ret, i = 0;
1246+
1247+
arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
1248+
if (!arrays)
1249+
return -ENOMEM;
1250+
1251+
list_for_each_entry(q, &vlan->queue_list, next)
1252+
arrays[i++] = &q->skb_array;
1253+
1254+
ret = skb_array_resize_multiple(arrays, n,
1255+
dev->tx_queue_len, GFP_KERNEL);
1256+
1257+
kfree(arrays);
1258+
return ret;
1259+
}
1260+
12051261
static int macvtap_device_event(struct notifier_block *unused,
12061262
unsigned long event, void *ptr)
12071263
{
@@ -1249,6 +1305,10 @@ static int macvtap_device_event(struct notifier_block *unused,
12491305
device_destroy(&macvtap_class, devt);
12501306
macvtap_free_minor(vlan);
12511307
break;
1308+
case NETDEV_CHANGE_TX_QUEUE_LEN:
1309+
if (macvtap_queue_resize(vlan))
1310+
return NOTIFY_BAD;
1311+
break;
12521312
}
12531313

12541314
return NOTIFY_DONE;

0 commit comments

Comments
 (0)