Skip to content

Commit 5503fce

Browse files
jasowangdavem330
authored andcommitted
tun: rx batching
We can only process 1 packet at one time during sendmsg(). This often lead bad cache utilization under heavy load. So this patch tries to do some batching during rx before submitting them to host network stack. This is done through accepting MSG_MORE as a hint from sendmsg() caller, if it was set, batch the packet temporarily in a linked list and submit them all once MSG_MORE were cleared. Tests were done by pktgen (burst=128) in guest over mlx4(noqueue) on host: Mpps -+% rx-frames = 0 0.91 +0% rx-frames = 4 1.00 +9.8% rx-frames = 8 1.00 +9.8% rx-frames = 16 1.01 +10.9% rx-frames = 32 1.07 +17.5% rx-frames = 48 1.07 +17.5% rx-frames = 64 1.08 +18.6% rx-frames = 64 (no MSG_MORE) 0.91 +0% User were allowed to change per device batched packets through ethtool -C rx-frames. NAPI_POLL_WEIGHT were used as upper limitation to prevent bh from being disabled too long. Signed-off-by: Jason Wang <[email protected]> Acked-by: Michael S. Tsirkin <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 0ed005c commit 5503fce

File tree

1 file changed

+70
-6
lines changed

1 file changed

+70
-6
lines changed

drivers/net/tun.c

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ struct tun_struct {
218218
struct list_head disabled;
219219
void *security;
220220
u32 flow_count;
221+
u32 rx_batched;
221222
struct tun_pcpu_stats __percpu *pcpu_stats;
222223
};
223224

@@ -522,6 +523,7 @@ static void tun_queue_purge(struct tun_file *tfile)
522523
while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
523524
kfree_skb(skb);
524525

526+
skb_queue_purge(&tfile->sk.sk_write_queue);
525527
skb_queue_purge(&tfile->sk.sk_error_queue);
526528
}
527529

@@ -1139,10 +1141,46 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
11391141
return skb;
11401142
}
11411143

1144+
static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
1145+
struct sk_buff *skb, int more)
1146+
{
1147+
struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
1148+
struct sk_buff_head process_queue;
1149+
u32 rx_batched = tun->rx_batched;
1150+
bool rcv = false;
1151+
1152+
if (!rx_batched || (!more && skb_queue_empty(queue))) {
1153+
local_bh_disable();
1154+
netif_receive_skb(skb);
1155+
local_bh_enable();
1156+
return;
1157+
}
1158+
1159+
spin_lock(&queue->lock);
1160+
if (!more || skb_queue_len(queue) == rx_batched) {
1161+
__skb_queue_head_init(&process_queue);
1162+
skb_queue_splice_tail_init(queue, &process_queue);
1163+
rcv = true;
1164+
} else {
1165+
__skb_queue_tail(queue, skb);
1166+
}
1167+
spin_unlock(&queue->lock);
1168+
1169+
if (rcv) {
1170+
struct sk_buff *nskb;
1171+
1172+
local_bh_disable();
1173+
while ((nskb = __skb_dequeue(&process_queue)))
1174+
netif_receive_skb(nskb);
1175+
netif_receive_skb(skb);
1176+
local_bh_enable();
1177+
}
1178+
}
1179+
11421180
/* Get packet from user space buffer */
11431181
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
11441182
void *msg_control, struct iov_iter *from,
1145-
int noblock)
1183+
int noblock, bool more)
11461184
{
11471185
struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
11481186
struct sk_buff *skb;
@@ -1283,9 +1321,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
12831321

12841322
rxhash = skb_get_hash(skb);
12851323
#ifndef CONFIG_4KSTACKS
1286-
local_bh_disable();
1287-
netif_receive_skb(skb);
1288-
local_bh_enable();
1324+
tun_rx_batched(tun, tfile, skb, more);
12891325
#else
12901326
netif_rx_ni(skb);
12911327
#endif
@@ -1311,7 +1347,8 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
13111347
if (!tun)
13121348
return -EBADFD;
13131349

1314-
result = tun_get_user(tun, tfile, NULL, from, file->f_flags & O_NONBLOCK);
1350+
result = tun_get_user(tun, tfile, NULL, from,
1351+
file->f_flags & O_NONBLOCK, false);
13151352

13161353
tun_put(tun);
13171354
return result;
@@ -1569,7 +1606,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
15691606
return -EBADFD;
15701607

15711608
ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
1572-
m->msg_flags & MSG_DONTWAIT);
1609+
m->msg_flags & MSG_DONTWAIT,
1610+
m->msg_flags & MSG_MORE);
15731611
tun_put(tun);
15741612
return ret;
15751613
}
@@ -1770,6 +1808,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
17701808
tun->align = NET_SKB_PAD;
17711809
tun->filter_attached = false;
17721810
tun->sndbuf = tfile->socket.sk->sk_sndbuf;
1811+
tun->rx_batched = 0;
17731812

17741813
tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
17751814
if (!tun->pcpu_stats) {
@@ -2438,13 +2477,38 @@ static void tun_set_msglevel(struct net_device *dev, u32 value)
24382477
#endif
24392478
}
24402479

2480+
static int tun_get_coalesce(struct net_device *dev,
2481+
struct ethtool_coalesce *ec)
2482+
{
2483+
struct tun_struct *tun = netdev_priv(dev);
2484+
2485+
ec->rx_max_coalesced_frames = tun->rx_batched;
2486+
2487+
return 0;
2488+
}
2489+
2490+
static int tun_set_coalesce(struct net_device *dev,
2491+
struct ethtool_coalesce *ec)
2492+
{
2493+
struct tun_struct *tun = netdev_priv(dev);
2494+
2495+
if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT)
2496+
tun->rx_batched = NAPI_POLL_WEIGHT;
2497+
else
2498+
tun->rx_batched = ec->rx_max_coalesced_frames;
2499+
2500+
return 0;
2501+
}
2502+
24412503
static const struct ethtool_ops tun_ethtool_ops = {
24422504
.get_settings = tun_get_settings,
24432505
.get_drvinfo = tun_get_drvinfo,
24442506
.get_msglevel = tun_get_msglevel,
24452507
.set_msglevel = tun_set_msglevel,
24462508
.get_link = ethtool_op_get_link,
24472509
.get_ts_info = ethtool_op_get_ts_info,
2510+
.get_coalesce = tun_get_coalesce,
2511+
.set_coalesce = tun_set_coalesce,
24482512
};
24492513

24502514
static int tun_queue_resize(struct tun_struct *tun)

0 commit comments

Comments
 (0)