Skip to content

Commit fc72d1d

Browse files
jasowangdavem330
authored andcommitted
tuntap: XDP transmission
This patch implements XDP transmission for TAP. Since we can't create new queues for TAP during XDP set, exist ptr_ring was reused for queuing XDP buffers. To differ xdp_buff from sk_buff, TUN_XDP_FLAG (0x1UL) was encoded into lowest bit of xpd_buff pointer during ptr_ring_produce, and was decoded during consuming. XDP metadata was stored in the headroom of the packet which should work in most of cases since driver usually reserve enough headroom. Very minor changes were done for vhost_net: it just need to peek the length depends on the type of pointer. Tests were done on two Intel E5-2630 2.40GHz machines connected back to back through two 82599ES. Traffic were generated/received through MoonGen/testpmd(rxonly). It reports ~20% improvements when xdp_redirect_map is doing redirection from ixgbe to TAP (from 2.50Mpps to 3.05Mpps) Cc: Jesper Dangaard Brouer <[email protected]> Signed-off-by: Jason Wang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 5990a30 commit fc72d1d

File tree

3 files changed

+208
-33
lines changed

3 files changed

+208
-33
lines changed

drivers/net/tun.c

Lines changed: 179 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,24 @@ struct tun_struct {
241241
struct tun_steering_prog __rcu *steering_prog;
242242
};
243243

244+
bool tun_is_xdp_buff(void *ptr)
245+
{
246+
return (unsigned long)ptr & TUN_XDP_FLAG;
247+
}
248+
EXPORT_SYMBOL(tun_is_xdp_buff);
249+
250+
void *tun_xdp_to_ptr(void *ptr)
251+
{
252+
return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
253+
}
254+
EXPORT_SYMBOL(tun_xdp_to_ptr);
255+
256+
void *tun_ptr_to_xdp(void *ptr)
257+
{
258+
return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
259+
}
260+
EXPORT_SYMBOL(tun_ptr_to_xdp);
261+
244262
static int tun_napi_receive(struct napi_struct *napi, int budget)
245263
{
246264
struct tun_file *tfile = container_of(napi, struct tun_file, napi);
@@ -631,12 +649,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
631649
return tun;
632650
}
633651

652+
static void tun_ptr_free(void *ptr)
653+
{
654+
if (!ptr)
655+
return;
656+
if (tun_is_xdp_buff(ptr)) {
657+
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
658+
659+
put_page(virt_to_head_page(xdp->data));
660+
} else {
661+
__skb_array_destroy_skb(ptr);
662+
}
663+
}
664+
634665
static void tun_queue_purge(struct tun_file *tfile)
635666
{
636-
struct sk_buff *skb;
667+
void *ptr;
637668

638-
while ((skb = ptr_ring_consume(&tfile->tx_ring)) != NULL)
639-
kfree_skb(skb);
669+
while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
670+
tun_ptr_free(ptr);
640671

641672
skb_queue_purge(&tfile->sk.sk_write_queue);
642673
skb_queue_purge(&tfile->sk.sk_error_queue);
@@ -689,8 +720,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
689720
unregister_netdevice(tun->dev);
690721
}
691722
if (tun) {
692-
ptr_ring_cleanup(&tfile->tx_ring,
693-
__skb_array_destroy_skb);
723+
ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
694724
xdp_rxq_info_unreg(&tfile->xdp_rxq);
695725
}
696726
sock_put(&tfile->sk);
@@ -1222,6 +1252,67 @@ static const struct net_device_ops tun_netdev_ops = {
12221252
.ndo_get_stats64 = tun_net_get_stats64,
12231253
};
12241254

1255+
static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
1256+
{
1257+
struct tun_struct *tun = netdev_priv(dev);
1258+
struct xdp_buff *buff = xdp->data_hard_start;
1259+
int headroom = xdp->data - xdp->data_hard_start;
1260+
struct tun_file *tfile;
1261+
u32 numqueues;
1262+
int ret = 0;
1263+
1264+
/* Assure headroom is available and buff is properly aligned */
1265+
if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
1266+
return -ENOSPC;
1267+
1268+
*buff = *xdp;
1269+
1270+
rcu_read_lock();
1271+
1272+
numqueues = READ_ONCE(tun->numqueues);
1273+
if (!numqueues) {
1274+
ret = -ENOSPC;
1275+
goto out;
1276+
}
1277+
1278+
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
1279+
numqueues]);
1280+
/* Encode the XDP flag into lowest bit for consumer to differ
1281+
* XDP buffer from sk_buff.
1282+
*/
1283+
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
1284+
this_cpu_inc(tun->pcpu_stats->tx_dropped);
1285+
ret = -ENOSPC;
1286+
}
1287+
1288+
out:
1289+
rcu_read_unlock();
1290+
return ret;
1291+
}
1292+
1293+
static void tun_xdp_flush(struct net_device *dev)
1294+
{
1295+
struct tun_struct *tun = netdev_priv(dev);
1296+
struct tun_file *tfile;
1297+
u32 numqueues;
1298+
1299+
rcu_read_lock();
1300+
1301+
numqueues = READ_ONCE(tun->numqueues);
1302+
if (!numqueues)
1303+
goto out;
1304+
1305+
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
1306+
numqueues]);
1307+
/* Notify and wake up reader process */
1308+
if (tfile->flags & TUN_FASYNC)
1309+
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1310+
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1311+
1312+
out:
1313+
rcu_read_unlock();
1314+
}
1315+
12251316
static const struct net_device_ops tap_netdev_ops = {
12261317
.ndo_uninit = tun_net_uninit,
12271318
.ndo_open = tun_net_open,
@@ -1239,6 +1330,8 @@ static const struct net_device_ops tap_netdev_ops = {
12391330
.ndo_set_rx_headroom = tun_set_headroom,
12401331
.ndo_get_stats64 = tun_net_get_stats64,
12411332
.ndo_bpf = tun_xdp,
1333+
.ndo_xdp_xmit = tun_xdp_xmit,
1334+
.ndo_xdp_flush = tun_xdp_flush,
12421335
};
12431336

12441337
static void tun_flow_init(struct tun_struct *tun)
@@ -1863,6 +1956,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
18631956
return result;
18641957
}
18651958

1959+
static ssize_t tun_put_user_xdp(struct tun_struct *tun,
1960+
struct tun_file *tfile,
1961+
struct xdp_buff *xdp,
1962+
struct iov_iter *iter)
1963+
{
1964+
int vnet_hdr_sz = 0;
1965+
size_t size = xdp->data_end - xdp->data;
1966+
struct tun_pcpu_stats *stats;
1967+
size_t ret;
1968+
1969+
if (tun->flags & IFF_VNET_HDR) {
1970+
struct virtio_net_hdr gso = { 0 };
1971+
1972+
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
1973+
if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
1974+
return -EINVAL;
1975+
if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
1976+
sizeof(gso)))
1977+
return -EFAULT;
1978+
iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
1979+
}
1980+
1981+
ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
1982+
1983+
stats = get_cpu_ptr(tun->pcpu_stats);
1984+
u64_stats_update_begin(&stats->syncp);
1985+
stats->tx_packets++;
1986+
stats->tx_bytes += ret;
1987+
u64_stats_update_end(&stats->syncp);
1988+
put_cpu_ptr(tun->pcpu_stats);
1989+
1990+
return ret;
1991+
}
1992+
18661993
/* Put packet to the user space buffer */
18671994
static ssize_t tun_put_user(struct tun_struct *tun,
18681995
struct tun_file *tfile,
@@ -1960,15 +2087,14 @@ static ssize_t tun_put_user(struct tun_struct *tun,
19602087
return total;
19612088
}
19622089

1963-
static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
1964-
int *err)
2090+
static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
19652091
{
19662092
DECLARE_WAITQUEUE(wait, current);
1967-
struct sk_buff *skb = NULL;
2093+
void *ptr = NULL;
19682094
int error = 0;
19692095

1970-
skb = ptr_ring_consume(&tfile->tx_ring);
1971-
if (skb)
2096+
ptr = ptr_ring_consume(&tfile->tx_ring);
2097+
if (ptr)
19722098
goto out;
19732099
if (noblock) {
19742100
error = -EAGAIN;
@@ -1979,8 +2105,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
19792105
current->state = TASK_INTERRUPTIBLE;
19802106

19812107
while (1) {
1982-
skb = ptr_ring_consume(&tfile->tx_ring);
1983-
if (skb)
2108+
ptr = ptr_ring_consume(&tfile->tx_ring);
2109+
if (ptr)
19842110
break;
19852111
if (signal_pending(current)) {
19862112
error = -ERESTARTSYS;
@@ -1999,36 +2125,44 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
19992125

20002126
out:
20012127
*err = error;
2002-
return skb;
2128+
return ptr;
20032129
}
20042130

20052131
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
20062132
struct iov_iter *to,
2007-
int noblock, struct sk_buff *skb)
2133+
int noblock, void *ptr)
20082134
{
20092135
ssize_t ret;
20102136
int err;
20112137

20122138
tun_debug(KERN_INFO, tun, "tun_do_read\n");
20132139

20142140
if (!iov_iter_count(to)) {
2015-
if (skb)
2016-
kfree_skb(skb);
2141+
tun_ptr_free(ptr);
20172142
return 0;
20182143
}
20192144

2020-
if (!skb) {
2145+
if (!ptr) {
20212146
/* Read frames from ring */
2022-
skb = tun_ring_recv(tfile, noblock, &err);
2023-
if (!skb)
2147+
ptr = tun_ring_recv(tfile, noblock, &err);
2148+
if (!ptr)
20242149
return err;
20252150
}
20262151

2027-
ret = tun_put_user(tun, tfile, skb, to);
2028-
if (unlikely(ret < 0))
2029-
kfree_skb(skb);
2030-
else
2031-
consume_skb(skb);
2152+
if (tun_is_xdp_buff(ptr)) {
2153+
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
2154+
2155+
ret = tun_put_user_xdp(tun, tfile, xdp, to);
2156+
put_page(virt_to_head_page(xdp->data));
2157+
} else {
2158+
struct sk_buff *skb = ptr;
2159+
2160+
ret = tun_put_user(tun, tfile, skb, to);
2161+
if (unlikely(ret < 0))
2162+
kfree_skb(skb);
2163+
else
2164+
consume_skb(skb);
2165+
}
20322166

20332167
return ret;
20342168
}
@@ -2165,12 +2299,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
21652299
{
21662300
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
21672301
struct tun_struct *tun = tun_get(tfile);
2168-
struct sk_buff *skb = m->msg_control;
2302+
void *ptr = m->msg_control;
21692303
int ret;
21702304

21712305
if (!tun) {
21722306
ret = -EBADFD;
2173-
goto out_free_skb;
2307+
goto out_free;
21742308
}
21752309

21762310
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
@@ -2182,7 +2316,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
21822316
SOL_PACKET, TUN_TX_TIMESTAMP);
21832317
goto out;
21842318
}
2185-
ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
2319+
ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
21862320
if (ret > (ssize_t)total_len) {
21872321
m->msg_flags |= MSG_TRUNC;
21882322
ret = flags & MSG_TRUNC ? ret : total_len;
@@ -2193,12 +2327,25 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
21932327

21942328
out_put_tun:
21952329
tun_put(tun);
2196-
out_free_skb:
2197-
if (skb)
2198-
kfree_skb(skb);
2330+
out_free:
2331+
tun_ptr_free(ptr);
21992332
return ret;
22002333
}
22012334

2335+
static int tun_ptr_peek_len(void *ptr)
2336+
{
2337+
if (likely(ptr)) {
2338+
if (tun_is_xdp_buff(ptr)) {
2339+
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
2340+
2341+
return xdp->data_end - xdp->data;
2342+
}
2343+
return __skb_array_len_with_tag(ptr);
2344+
} else {
2345+
return 0;
2346+
}
2347+
}
2348+
22022349
static int tun_peek_len(struct socket *sock)
22032350
{
22042351
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
@@ -2209,7 +2356,7 @@ static int tun_peek_len(struct socket *sock)
22092356
if (!tun)
22102357
return 0;
22112358

2212-
ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, __skb_array_len_with_tag);
2359+
ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
22132360
tun_put(tun);
22142361

22152362
return ret;
@@ -3132,7 +3279,7 @@ static int tun_queue_resize(struct tun_struct *tun)
31323279

31333280
ret = ptr_ring_resize_multiple(rings, n,
31343281
dev->tx_queue_len, GFP_KERNEL,
3135-
__skb_array_destroy_skb);
3282+
tun_ptr_free);
31363283

31373284
kfree(rings);
31383285
return ret;

drivers/vhost/net.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,17 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
175175
}
176176
}
177177

178+
static int vhost_net_buf_peek_len(void *ptr)
179+
{
180+
if (tun_is_xdp_buff(ptr)) {
181+
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
182+
183+
return xdp->data_end - xdp->data;
184+
}
185+
186+
return __skb_array_len_with_tag(ptr);
187+
}
188+
178189
static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
179190
{
180191
struct vhost_net_buf *rxq = &nvq->rxq;
@@ -186,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
186197
return 0;
187198

188199
out:
189-
return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq));
200+
return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq));
190201
}
191202

192203
static void vhost_net_buf_init(struct vhost_net_buf *rxq)

0 commit comments

Comments
 (0)