@@ -241,6 +241,24 @@ struct tun_struct {
241
241
struct tun_steering_prog __rcu * steering_prog ;
242
242
};
243
243
244
+ bool tun_is_xdp_buff (void * ptr )
245
+ {
246
+ return (unsigned long )ptr & TUN_XDP_FLAG ;
247
+ }
248
+ EXPORT_SYMBOL (tun_is_xdp_buff );
249
+
250
+ void * tun_xdp_to_ptr (void * ptr )
251
+ {
252
+ return (void * )((unsigned long )ptr | TUN_XDP_FLAG );
253
+ }
254
+ EXPORT_SYMBOL (tun_xdp_to_ptr );
255
+
256
+ void * tun_ptr_to_xdp (void * ptr )
257
+ {
258
+ return (void * )((unsigned long )ptr & ~TUN_XDP_FLAG );
259
+ }
260
+ EXPORT_SYMBOL (tun_ptr_to_xdp );
261
+
244
262
static int tun_napi_receive (struct napi_struct * napi , int budget )
245
263
{
246
264
struct tun_file * tfile = container_of (napi , struct tun_file , napi );
@@ -631,12 +649,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
631
649
return tun ;
632
650
}
633
651
652
+ static void tun_ptr_free (void * ptr )
653
+ {
654
+ if (!ptr )
655
+ return ;
656
+ if (tun_is_xdp_buff (ptr )) {
657
+ struct xdp_buff * xdp = tun_ptr_to_xdp (ptr );
658
+
659
+ put_page (virt_to_head_page (xdp -> data ));
660
+ } else {
661
+ __skb_array_destroy_skb (ptr );
662
+ }
663
+ }
664
+
634
665
static void tun_queue_purge (struct tun_file * tfile )
635
666
{
636
- struct sk_buff * skb ;
667
+ void * ptr ;
637
668
638
- while ((skb = ptr_ring_consume (& tfile -> tx_ring )) != NULL )
639
- kfree_skb ( skb );
669
+ while ((ptr = ptr_ring_consume (& tfile -> tx_ring )) != NULL )
670
+ tun_ptr_free ( ptr );
640
671
641
672
skb_queue_purge (& tfile -> sk .sk_write_queue );
642
673
skb_queue_purge (& tfile -> sk .sk_error_queue );
@@ -689,8 +720,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
689
720
unregister_netdevice (tun -> dev );
690
721
}
691
722
if (tun ) {
692
- ptr_ring_cleanup (& tfile -> tx_ring ,
693
- __skb_array_destroy_skb );
723
+ ptr_ring_cleanup (& tfile -> tx_ring , tun_ptr_free );
694
724
xdp_rxq_info_unreg (& tfile -> xdp_rxq );
695
725
}
696
726
sock_put (& tfile -> sk );
@@ -1222,6 +1252,67 @@ static const struct net_device_ops tun_netdev_ops = {
1222
1252
.ndo_get_stats64 = tun_net_get_stats64 ,
1223
1253
};
1224
1254
1255
+ static int tun_xdp_xmit (struct net_device * dev , struct xdp_buff * xdp )
1256
+ {
1257
+ struct tun_struct * tun = netdev_priv (dev );
1258
+ struct xdp_buff * buff = xdp -> data_hard_start ;
1259
+ int headroom = xdp -> data - xdp -> data_hard_start ;
1260
+ struct tun_file * tfile ;
1261
+ u32 numqueues ;
1262
+ int ret = 0 ;
1263
+
1264
+ /* Assure headroom is available and buff is properly aligned */
1265
+ if (unlikely (headroom < sizeof (* xdp ) || tun_is_xdp_buff (xdp )))
1266
+ return - ENOSPC ;
1267
+
1268
+ * buff = * xdp ;
1269
+
1270
+ rcu_read_lock ();
1271
+
1272
+ numqueues = READ_ONCE (tun -> numqueues );
1273
+ if (!numqueues ) {
1274
+ ret = - ENOSPC ;
1275
+ goto out ;
1276
+ }
1277
+
1278
+ tfile = rcu_dereference (tun -> tfiles [smp_processor_id () %
1279
+ numqueues ]);
1280
+ /* Encode the XDP flag into lowest bit for consumer to differ
1281
+ * XDP buffer from sk_buff.
1282
+ */
1283
+ if (ptr_ring_produce (& tfile -> tx_ring , tun_xdp_to_ptr (buff ))) {
1284
+ this_cpu_inc (tun -> pcpu_stats -> tx_dropped );
1285
+ ret = - ENOSPC ;
1286
+ }
1287
+
1288
+ out :
1289
+ rcu_read_unlock ();
1290
+ return ret ;
1291
+ }
1292
+
1293
+ static void tun_xdp_flush (struct net_device * dev )
1294
+ {
1295
+ struct tun_struct * tun = netdev_priv (dev );
1296
+ struct tun_file * tfile ;
1297
+ u32 numqueues ;
1298
+
1299
+ rcu_read_lock ();
1300
+
1301
+ numqueues = READ_ONCE (tun -> numqueues );
1302
+ if (!numqueues )
1303
+ goto out ;
1304
+
1305
+ tfile = rcu_dereference (tun -> tfiles [smp_processor_id () %
1306
+ numqueues ]);
1307
+ /* Notify and wake up reader process */
1308
+ if (tfile -> flags & TUN_FASYNC )
1309
+ kill_fasync (& tfile -> fasync , SIGIO , POLL_IN );
1310
+ tfile -> socket .sk -> sk_data_ready (tfile -> socket .sk );
1311
+
1312
+ out :
1313
+ rcu_read_unlock ();
1314
+ }
1315
+
1225
1316
static const struct net_device_ops tap_netdev_ops = {
1226
1317
.ndo_uninit = tun_net_uninit ,
1227
1318
.ndo_open = tun_net_open ,
@@ -1239,6 +1330,8 @@ static const struct net_device_ops tap_netdev_ops = {
1239
1330
.ndo_set_rx_headroom = tun_set_headroom ,
1240
1331
.ndo_get_stats64 = tun_net_get_stats64 ,
1241
1332
.ndo_bpf = tun_xdp ,
1333
+ .ndo_xdp_xmit = tun_xdp_xmit ,
1334
+ .ndo_xdp_flush = tun_xdp_flush ,
1242
1335
};
1243
1336
1244
1337
static void tun_flow_init (struct tun_struct * tun )
@@ -1863,6 +1956,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
1863
1956
return result ;
1864
1957
}
1865
1958
1959
+ static ssize_t tun_put_user_xdp (struct tun_struct * tun ,
1960
+ struct tun_file * tfile ,
1961
+ struct xdp_buff * xdp ,
1962
+ struct iov_iter * iter )
1963
+ {
1964
+ int vnet_hdr_sz = 0 ;
1965
+ size_t size = xdp -> data_end - xdp -> data ;
1966
+ struct tun_pcpu_stats * stats ;
1967
+ size_t ret ;
1968
+
1969
+ if (tun -> flags & IFF_VNET_HDR ) {
1970
+ struct virtio_net_hdr gso = { 0 };
1971
+
1972
+ vnet_hdr_sz = READ_ONCE (tun -> vnet_hdr_sz );
1973
+ if (unlikely (iov_iter_count (iter ) < vnet_hdr_sz ))
1974
+ return - EINVAL ;
1975
+ if (unlikely (copy_to_iter (& gso , sizeof (gso ), iter ) !=
1976
+ sizeof (gso )))
1977
+ return - EFAULT ;
1978
+ iov_iter_advance (iter , vnet_hdr_sz - sizeof (gso ));
1979
+ }
1980
+
1981
+ ret = copy_to_iter (xdp -> data , size , iter ) + vnet_hdr_sz ;
1982
+
1983
+ stats = get_cpu_ptr (tun -> pcpu_stats );
1984
+ u64_stats_update_begin (& stats -> syncp );
1985
+ stats -> tx_packets ++ ;
1986
+ stats -> tx_bytes += ret ;
1987
+ u64_stats_update_end (& stats -> syncp );
1988
+ put_cpu_ptr (tun -> pcpu_stats );
1989
+
1990
+ return ret ;
1991
+ }
1992
+
1866
1993
/* Put packet to the user space buffer */
1867
1994
static ssize_t tun_put_user (struct tun_struct * tun ,
1868
1995
struct tun_file * tfile ,
@@ -1960,15 +2087,14 @@ static ssize_t tun_put_user(struct tun_struct *tun,
1960
2087
return total ;
1961
2088
}
1962
2089
1963
- static struct sk_buff * tun_ring_recv (struct tun_file * tfile , int noblock ,
1964
- int * err )
2090
+ static void * tun_ring_recv (struct tun_file * tfile , int noblock , int * err )
1965
2091
{
1966
2092
DECLARE_WAITQUEUE (wait , current );
1967
- struct sk_buff * skb = NULL ;
2093
+ void * ptr = NULL ;
1968
2094
int error = 0 ;
1969
2095
1970
- skb = ptr_ring_consume (& tfile -> tx_ring );
1971
- if (skb )
2096
+ ptr = ptr_ring_consume (& tfile -> tx_ring );
2097
+ if (ptr )
1972
2098
goto out ;
1973
2099
if (noblock ) {
1974
2100
error = - EAGAIN ;
@@ -1979,8 +2105,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
1979
2105
current -> state = TASK_INTERRUPTIBLE ;
1980
2106
1981
2107
while (1 ) {
1982
- skb = ptr_ring_consume (& tfile -> tx_ring );
1983
- if (skb )
2108
+ ptr = ptr_ring_consume (& tfile -> tx_ring );
2109
+ if (ptr )
1984
2110
break ;
1985
2111
if (signal_pending (current )) {
1986
2112
error = - ERESTARTSYS ;
@@ -1999,36 +2125,44 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
1999
2125
2000
2126
out :
2001
2127
* err = error ;
2002
- return skb ;
2128
+ return ptr ;
2003
2129
}
2004
2130
2005
2131
static ssize_t tun_do_read (struct tun_struct * tun , struct tun_file * tfile ,
2006
2132
struct iov_iter * to ,
2007
- int noblock , struct sk_buff * skb )
2133
+ int noblock , void * ptr )
2008
2134
{
2009
2135
ssize_t ret ;
2010
2136
int err ;
2011
2137
2012
2138
tun_debug (KERN_INFO , tun , "tun_do_read\n" );
2013
2139
2014
2140
if (!iov_iter_count (to )) {
2015
- if (skb )
2016
- kfree_skb (skb );
2141
+ tun_ptr_free (ptr );
2017
2142
return 0 ;
2018
2143
}
2019
2144
2020
- if (!skb ) {
2145
+ if (!ptr ) {
2021
2146
/* Read frames from ring */
2022
- skb = tun_ring_recv (tfile , noblock , & err );
2023
- if (!skb )
2147
+ ptr = tun_ring_recv (tfile , noblock , & err );
2148
+ if (!ptr )
2024
2149
return err ;
2025
2150
}
2026
2151
2027
- ret = tun_put_user (tun , tfile , skb , to );
2028
- if (unlikely (ret < 0 ))
2029
- kfree_skb (skb );
2030
- else
2031
- consume_skb (skb );
2152
+ if (tun_is_xdp_buff (ptr )) {
2153
+ struct xdp_buff * xdp = tun_ptr_to_xdp (ptr );
2154
+
2155
+ ret = tun_put_user_xdp (tun , tfile , xdp , to );
2156
+ put_page (virt_to_head_page (xdp -> data ));
2157
+ } else {
2158
+ struct sk_buff * skb = ptr ;
2159
+
2160
+ ret = tun_put_user (tun , tfile , skb , to );
2161
+ if (unlikely (ret < 0 ))
2162
+ kfree_skb (skb );
2163
+ else
2164
+ consume_skb (skb );
2165
+ }
2032
2166
2033
2167
return ret ;
2034
2168
}
@@ -2165,12 +2299,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2165
2299
{
2166
2300
struct tun_file * tfile = container_of (sock , struct tun_file , socket );
2167
2301
struct tun_struct * tun = tun_get (tfile );
2168
- struct sk_buff * skb = m -> msg_control ;
2302
+ void * ptr = m -> msg_control ;
2169
2303
int ret ;
2170
2304
2171
2305
if (!tun ) {
2172
2306
ret = - EBADFD ;
2173
- goto out_free_skb ;
2307
+ goto out_free ;
2174
2308
}
2175
2309
2176
2310
if (flags & ~(MSG_DONTWAIT |MSG_TRUNC |MSG_ERRQUEUE )) {
@@ -2182,7 +2316,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2182
2316
SOL_PACKET , TUN_TX_TIMESTAMP );
2183
2317
goto out ;
2184
2318
}
2185
- ret = tun_do_read (tun , tfile , & m -> msg_iter , flags & MSG_DONTWAIT , skb );
2319
+ ret = tun_do_read (tun , tfile , & m -> msg_iter , flags & MSG_DONTWAIT , ptr );
2186
2320
if (ret > (ssize_t )total_len ) {
2187
2321
m -> msg_flags |= MSG_TRUNC ;
2188
2322
ret = flags & MSG_TRUNC ? ret : total_len ;
@@ -2193,12 +2327,25 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2193
2327
2194
2328
out_put_tun :
2195
2329
tun_put (tun );
2196
- out_free_skb :
2197
- if (skb )
2198
- kfree_skb (skb );
2330
+ out_free :
2331
+ tun_ptr_free (ptr );
2199
2332
return ret ;
2200
2333
}
2201
2334
2335
+ static int tun_ptr_peek_len (void * ptr )
2336
+ {
2337
+ if (likely (ptr )) {
2338
+ if (tun_is_xdp_buff (ptr )) {
2339
+ struct xdp_buff * xdp = tun_ptr_to_xdp (ptr );
2340
+
2341
+ return xdp -> data_end - xdp -> data ;
2342
+ }
2343
+ return __skb_array_len_with_tag (ptr );
2344
+ } else {
2345
+ return 0 ;
2346
+ }
2347
+ }
2348
+
2202
2349
static int tun_peek_len (struct socket * sock )
2203
2350
{
2204
2351
struct tun_file * tfile = container_of (sock , struct tun_file , socket );
@@ -2209,7 +2356,7 @@ static int tun_peek_len(struct socket *sock)
2209
2356
if (!tun )
2210
2357
return 0 ;
2211
2358
2212
- ret = PTR_RING_PEEK_CALL (& tfile -> tx_ring , __skb_array_len_with_tag );
2359
+ ret = PTR_RING_PEEK_CALL (& tfile -> tx_ring , tun_ptr_peek_len );
2213
2360
tun_put (tun );
2214
2361
2215
2362
return ret ;
@@ -3132,7 +3279,7 @@ static int tun_queue_resize(struct tun_struct *tun)
3132
3279
3133
3280
ret = ptr_ring_resize_multiple (rings , n ,
3134
3281
dev -> tx_queue_len , GFP_KERNEL ,
3135
- __skb_array_destroy_skb );
3282
+ tun_ptr_free );
3136
3283
3137
3284
kfree (rings );
3138
3285
return ret ;
0 commit comments