Skip to content

Commit e3e37e7

Browse files
committed
Merge branch 'vhost_net-batching'
Jason Wang says: ==================== vhost_net tx batching This series tries to implement tx batching support for vhost. This was done by using MSG_MORE as a hint for under layer socket. The backend (e.g tap) can then batch the packets temporarily in a list and submit it all once the number of bacthed exceeds a limitation. Tests shows obvious improvement on guest pktgen over over mlx4(noqueue) on host: Mpps -+% rx-frames = 0 0.91 +0% rx-frames = 4 1.00 +9.8% rx-frames = 8 1.00 +9.8% rx-frames = 16 1.01 +10.9% rx-frames = 32 1.07 +17.5% rx-frames = 48 1.07 +17.5% rx-frames = 64 1.08 +18.6% rx-frames = 64 (no MSG_MORE) 0.91 +0% Changes from V4: - stick to NAPI_POLL_WEIGHT for rx-frames is user specify a value greater than it. Changes from V3: - use ethtool instead of module parameter to control the maximum number of batched packets - avoid overhead when MSG_MORE were not set and no packet queued Changes from V2: - remove uselss queue limitation check (and we don't drop any packet now) Changes from V1: - drop NAPI handler since we don't use NAPI now - fix the issues that may exceeds max pending of zerocopy - more improvement on available buffer detection - move the limitation of batched pacekts from vhost to tuntap ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 1a8b6d7 + 5503fce commit e3e37e7

File tree

3 files changed

+96
-11
lines changed

3 files changed

+96
-11
lines changed

drivers/net/tun.c

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ struct tun_struct {
218218
struct list_head disabled;
219219
void *security;
220220
u32 flow_count;
221+
u32 rx_batched;
221222
struct tun_pcpu_stats __percpu *pcpu_stats;
222223
};
223224

@@ -522,6 +523,7 @@ static void tun_queue_purge(struct tun_file *tfile)
522523
while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
523524
kfree_skb(skb);
524525

526+
skb_queue_purge(&tfile->sk.sk_write_queue);
525527
skb_queue_purge(&tfile->sk.sk_error_queue);
526528
}
527529

@@ -1139,10 +1141,46 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
11391141
return skb;
11401142
}
11411143

1144+
static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
1145+
struct sk_buff *skb, int more)
1146+
{
1147+
struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
1148+
struct sk_buff_head process_queue;
1149+
u32 rx_batched = tun->rx_batched;
1150+
bool rcv = false;
1151+
1152+
if (!rx_batched || (!more && skb_queue_empty(queue))) {
1153+
local_bh_disable();
1154+
netif_receive_skb(skb);
1155+
local_bh_enable();
1156+
return;
1157+
}
1158+
1159+
spin_lock(&queue->lock);
1160+
if (!more || skb_queue_len(queue) == rx_batched) {
1161+
__skb_queue_head_init(&process_queue);
1162+
skb_queue_splice_tail_init(queue, &process_queue);
1163+
rcv = true;
1164+
} else {
1165+
__skb_queue_tail(queue, skb);
1166+
}
1167+
spin_unlock(&queue->lock);
1168+
1169+
if (rcv) {
1170+
struct sk_buff *nskb;
1171+
1172+
local_bh_disable();
1173+
while ((nskb = __skb_dequeue(&process_queue)))
1174+
netif_receive_skb(nskb);
1175+
netif_receive_skb(skb);
1176+
local_bh_enable();
1177+
}
1178+
}
1179+
11421180
/* Get packet from user space buffer */
11431181
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
11441182
void *msg_control, struct iov_iter *from,
1145-
int noblock)
1183+
int noblock, bool more)
11461184
{
11471185
struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
11481186
struct sk_buff *skb;
@@ -1283,9 +1321,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
12831321

12841322
rxhash = skb_get_hash(skb);
12851323
#ifndef CONFIG_4KSTACKS
1286-
local_bh_disable();
1287-
netif_receive_skb(skb);
1288-
local_bh_enable();
1324+
tun_rx_batched(tun, tfile, skb, more);
12891325
#else
12901326
netif_rx_ni(skb);
12911327
#endif
@@ -1311,7 +1347,8 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
13111347
if (!tun)
13121348
return -EBADFD;
13131349

1314-
result = tun_get_user(tun, tfile, NULL, from, file->f_flags & O_NONBLOCK);
1350+
result = tun_get_user(tun, tfile, NULL, from,
1351+
file->f_flags & O_NONBLOCK, false);
13151352

13161353
tun_put(tun);
13171354
return result;
@@ -1569,7 +1606,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
15691606
return -EBADFD;
15701607

15711608
ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
1572-
m->msg_flags & MSG_DONTWAIT);
1609+
m->msg_flags & MSG_DONTWAIT,
1610+
m->msg_flags & MSG_MORE);
15731611
tun_put(tun);
15741612
return ret;
15751613
}
@@ -1770,6 +1808,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
17701808
tun->align = NET_SKB_PAD;
17711809
tun->filter_attached = false;
17721810
tun->sndbuf = tfile->socket.sk->sk_sndbuf;
1811+
tun->rx_batched = 0;
17731812

17741813
tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
17751814
if (!tun->pcpu_stats) {
@@ -2438,13 +2477,38 @@ static void tun_set_msglevel(struct net_device *dev, u32 value)
24382477
#endif
24392478
}
24402479

2480+
static int tun_get_coalesce(struct net_device *dev,
2481+
struct ethtool_coalesce *ec)
2482+
{
2483+
struct tun_struct *tun = netdev_priv(dev);
2484+
2485+
ec->rx_max_coalesced_frames = tun->rx_batched;
2486+
2487+
return 0;
2488+
}
2489+
2490+
static int tun_set_coalesce(struct net_device *dev,
2491+
struct ethtool_coalesce *ec)
2492+
{
2493+
struct tun_struct *tun = netdev_priv(dev);
2494+
2495+
if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT)
2496+
tun->rx_batched = NAPI_POLL_WEIGHT;
2497+
else
2498+
tun->rx_batched = ec->rx_max_coalesced_frames;
2499+
2500+
return 0;
2501+
}
2502+
24412503
static const struct ethtool_ops tun_ethtool_ops = {
24422504
.get_settings = tun_get_settings,
24432505
.get_drvinfo = tun_get_drvinfo,
24442506
.get_msglevel = tun_get_msglevel,
24452507
.set_msglevel = tun_set_msglevel,
24462508
.get_link = ethtool_op_get_link,
24472509
.get_ts_info = ethtool_op_get_ts_info,
2510+
.get_coalesce = tun_get_coalesce,
2511+
.set_coalesce = tun_set_coalesce,
24482512
};
24492513

24502514
static int tun_queue_resize(struct tun_struct *tun)

drivers/vhost/net.c

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,15 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
351351
return r;
352352
}
353353

354+
static bool vhost_exceeds_maxpend(struct vhost_net *net)
355+
{
356+
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
357+
struct vhost_virtqueue *vq = &nvq->vq;
358+
359+
return (nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV
360+
== nvq->done_idx;
361+
}
362+
354363
/* Expects to be always run from workqueue - which acts as
355364
* read-size critical section for our kind of RCU. */
356365
static void handle_tx(struct vhost_net *net)
@@ -394,8 +403,7 @@ static void handle_tx(struct vhost_net *net)
394403
/* If more outstanding DMAs, queue the work.
395404
* Handle upend_idx wrap around
396405
*/
397-
if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND)
398-
% UIO_MAXIOV == nvq->done_idx))
406+
if (unlikely(vhost_exceeds_maxpend(net)))
399407
break;
400408

401409
head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
@@ -454,6 +462,16 @@ static void handle_tx(struct vhost_net *net)
454462
msg.msg_control = NULL;
455463
ubufs = NULL;
456464
}
465+
466+
total_len += len;
467+
if (total_len < VHOST_NET_WEIGHT &&
468+
!vhost_vq_avail_empty(&net->dev, vq) &&
469+
likely(!vhost_exceeds_maxpend(net))) {
470+
msg.msg_flags |= MSG_MORE;
471+
} else {
472+
msg.msg_flags &= ~MSG_MORE;
473+
}
474+
457475
/* TODO: Check specific error and bomb out unless ENOBUFS? */
458476
err = sock->ops->sendmsg(sock, &msg, len);
459477
if (unlikely(err < 0)) {
@@ -472,7 +490,6 @@ static void handle_tx(struct vhost_net *net)
472490
vhost_add_used_and_signal(&net->dev, vq, head, 0);
473491
else
474492
vhost_zerocopy_signal_used(net, vq);
475-
total_len += len;
476493
vhost_net_tx_packet(net);
477494
if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
478495
vhost_poll_queue(&vq->poll);

drivers/vhost/vhost.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2241,11 +2241,15 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
22412241
__virtio16 avail_idx;
22422242
int r;
22432243

2244+
if (vq->avail_idx != vq->last_avail_idx)
2245+
return false;
2246+
22442247
r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
2245-
if (r)
2248+
if (unlikely(r))
22462249
return false;
2250+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
22472251

2248-
return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx;
2252+
return vq->avail_idx == vq->last_avail_idx;
22492253
}
22502254
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
22512255

0 commit comments

Comments
 (0)