@@ -195,6 +195,11 @@ struct tun_flow_entry {
195
195
196
196
#define TUN_NUM_FLOW_ENTRIES 1024
197
197
198
+ struct tun_steering_prog {
199
+ struct rcu_head rcu ;
200
+ struct bpf_prog * prog ;
201
+ };
202
+
198
203
/* Since the socket were moved to tun_file, to preserve the behavior of persist
199
204
* device, socket filter, sndbuf and vnet header size were restore when the
200
205
* file were attached to a persist device.
@@ -232,6 +237,7 @@ struct tun_struct {
232
237
u32 rx_batched ;
233
238
struct tun_pcpu_stats __percpu * pcpu_stats ;
234
239
struct bpf_prog __rcu * xdp_prog ;
240
+ struct tun_steering_prog __rcu * steering_prog ;
235
241
};
236
242
237
243
static int tun_napi_receive (struct napi_struct * napi , int budget )
@@ -537,15 +543,12 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
537
543
* different rxq no. here. If we could not get rxhash, then we would
538
544
* hope the rxq no. may help here.
539
545
*/
540
- static u16 tun_select_queue (struct net_device * dev , struct sk_buff * skb ,
541
- void * accel_priv , select_queue_fallback_t fallback )
546
+ static u16 tun_automq_select_queue (struct tun_struct * tun , struct sk_buff * skb )
542
547
{
543
- struct tun_struct * tun = netdev_priv (dev );
544
548
struct tun_flow_entry * e ;
545
549
u32 txq = 0 ;
546
550
u32 numqueues = 0 ;
547
551
548
- rcu_read_lock ();
549
552
numqueues = READ_ONCE (tun -> numqueues );
550
553
551
554
txq = __skb_get_hash_symmetric (skb );
@@ -563,10 +566,37 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
563
566
txq -= numqueues ;
564
567
}
565
568
566
- rcu_read_unlock ();
567
569
return txq ;
568
570
}
569
571
572
+ static u16 tun_ebpf_select_queue (struct tun_struct * tun , struct sk_buff * skb )
573
+ {
574
+ struct tun_steering_prog * prog ;
575
+ u16 ret = 0 ;
576
+
577
+ prog = rcu_dereference (tun -> steering_prog );
578
+ if (prog )
579
+ ret = bpf_prog_run_clear_cb (prog -> prog , skb );
580
+
581
+ return ret % tun -> numqueues ;
582
+ }
583
+
584
+ static u16 tun_select_queue (struct net_device * dev , struct sk_buff * skb ,
585
+ void * accel_priv , select_queue_fallback_t fallback )
586
+ {
587
+ struct tun_struct * tun = netdev_priv (dev );
588
+ u16 ret ;
589
+
590
+ rcu_read_lock ();
591
+ if (rcu_dereference (tun -> steering_prog ))
592
+ ret = tun_ebpf_select_queue (tun , skb );
593
+ else
594
+ ret = tun_automq_select_queue (tun , skb );
595
+ rcu_read_unlock ();
596
+
597
+ return ret ;
598
+ }
599
+
570
600
static inline bool tun_not_capable (struct tun_struct * tun )
571
601
{
572
602
const struct cred * cred = current_cred ();
@@ -933,23 +963,10 @@ static int tun_net_close(struct net_device *dev)
933
963
}
934
964
935
965
/* Net device start xmit */
936
- static netdev_tx_t tun_net_xmit (struct sk_buff * skb , struct net_device * dev )
966
+ static void tun_automq_xmit (struct tun_struct * tun , struct sk_buff * skb )
937
967
{
938
- struct tun_struct * tun = netdev_priv (dev );
939
- int txq = skb -> queue_mapping ;
940
- struct tun_file * tfile ;
941
- u32 numqueues = 0 ;
942
-
943
- rcu_read_lock ();
944
- tfile = rcu_dereference (tun -> tfiles [txq ]);
945
- numqueues = READ_ONCE (tun -> numqueues );
946
-
947
- /* Drop packet if interface is not attached */
948
- if (txq >= numqueues )
949
- goto drop ;
950
-
951
968
#ifdef CONFIG_RPS
952
- if (numqueues == 1 && static_key_false (& rps_needed )) {
969
+ if (tun -> numqueues == 1 && static_key_false (& rps_needed )) {
953
970
/* Select queue was not called for the skbuff, so we extract the
954
971
* RPS hash and save it into the flow_table here.
955
972
*/
@@ -965,6 +982,26 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
965
982
}
966
983
}
967
984
#endif
985
+ }
986
+
987
+ /* Net device start xmit */
988
+ static netdev_tx_t tun_net_xmit (struct sk_buff * skb , struct net_device * dev )
989
+ {
990
+ struct tun_struct * tun = netdev_priv (dev );
991
+ int txq = skb -> queue_mapping ;
992
+ struct tun_file * tfile ;
993
+ u32 numqueues = 0 ;
994
+
995
+ rcu_read_lock ();
996
+ tfile = rcu_dereference (tun -> tfiles [txq ]);
997
+ numqueues = READ_ONCE (tun -> numqueues );
998
+
999
+ /* Drop packet if interface is not attached */
1000
+ if (txq >= numqueues )
1001
+ goto drop ;
1002
+
1003
+ if (!rcu_dereference (tun -> steering_prog ))
1004
+ tun_automq_xmit (tun , skb );
968
1005
969
1006
tun_debug (KERN_INFO , tun , "tun_net_xmit %d\n" , skb -> len );
970
1007
@@ -1547,7 +1584,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1547
1584
int copylen ;
1548
1585
bool zerocopy = false;
1549
1586
int err ;
1550
- u32 rxhash ;
1587
+ u32 rxhash = 0 ;
1551
1588
int skb_xdp = 1 ;
1552
1589
bool frags = tun_napi_frags_enabled (tun );
1553
1590
@@ -1735,7 +1772,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1735
1772
rcu_read_unlock ();
1736
1773
}
1737
1774
1738
- rxhash = __skb_get_hash_symmetric (skb );
1775
+ rcu_read_lock ();
1776
+ if (!rcu_dereference (tun -> steering_prog ))
1777
+ rxhash = __skb_get_hash_symmetric (skb );
1778
+ rcu_read_unlock ();
1739
1779
1740
1780
if (frags ) {
1741
1781
/* Exercise flow dissector code path. */
@@ -1779,7 +1819,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1779
1819
u64_stats_update_end (& stats -> syncp );
1780
1820
put_cpu_ptr (stats );
1781
1821
1782
- tun_flow_update (tun , rxhash , tfile );
1822
+ if (rxhash )
1823
+ tun_flow_update (tun , rxhash , tfile );
1824
+
1783
1825
return total_len ;
1784
1826
}
1785
1827
@@ -1987,6 +2029,36 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
1987
2029
return ret ;
1988
2030
}
1989
2031
2032
+ static void tun_steering_prog_free (struct rcu_head * rcu )
2033
+ {
2034
+ struct tun_steering_prog * prog = container_of (rcu ,
2035
+ struct tun_steering_prog , rcu );
2036
+
2037
+ bpf_prog_destroy (prog -> prog );
2038
+ kfree (prog );
2039
+ }
2040
+
2041
+ static int __tun_set_steering_ebpf (struct tun_struct * tun ,
2042
+ struct bpf_prog * prog )
2043
+ {
2044
+ struct tun_steering_prog * old , * new = NULL ;
2045
+
2046
+ if (prog ) {
2047
+ new = kmalloc (sizeof (* new ), GFP_KERNEL );
2048
+ if (!new )
2049
+ return - ENOMEM ;
2050
+ new -> prog = prog ;
2051
+ }
2052
+
2053
+ old = rtnl_dereference (tun -> steering_prog );
2054
+ rcu_assign_pointer (tun -> steering_prog , new );
2055
+
2056
+ if (old )
2057
+ call_rcu (& old -> rcu , tun_steering_prog_free );
2058
+
2059
+ return 0 ;
2060
+ }
2061
+
1990
2062
static void tun_free_netdev (struct net_device * dev )
1991
2063
{
1992
2064
struct tun_struct * tun = netdev_priv (dev );
@@ -1995,6 +2067,9 @@ static void tun_free_netdev(struct net_device *dev)
1995
2067
free_percpu (tun -> pcpu_stats );
1996
2068
tun_flow_uninit (tun );
1997
2069
security_tun_dev_free_security (tun -> security );
2070
+ rtnl_lock ();
2071
+ __tun_set_steering_ebpf (tun , NULL );
2072
+ rtnl_unlock ();
1998
2073
}
1999
2074
2000
2075
static void tun_setup (struct net_device * dev )
@@ -2283,6 +2358,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
2283
2358
tun -> filter_attached = false;
2284
2359
tun -> sndbuf = tfile -> socket .sk -> sk_sndbuf ;
2285
2360
tun -> rx_batched = 0 ;
2361
+ RCU_INIT_POINTER (tun -> steering_prog , NULL );
2286
2362
2287
2363
tun -> pcpu_stats = netdev_alloc_pcpu_stats (struct tun_pcpu_stats );
2288
2364
if (!tun -> pcpu_stats ) {
@@ -2475,6 +2551,25 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
2475
2551
return ret ;
2476
2552
}
2477
2553
2554
+ static int tun_set_steering_ebpf (struct tun_struct * tun , void __user * data )
2555
+ {
2556
+ struct bpf_prog * prog ;
2557
+ int fd ;
2558
+
2559
+ if (copy_from_user (& fd , data , sizeof (fd )))
2560
+ return - EFAULT ;
2561
+
2562
+ if (fd == -1 ) {
2563
+ prog = NULL ;
2564
+ } else {
2565
+ prog = bpf_prog_get_type (fd , BPF_PROG_TYPE_SOCKET_FILTER );
2566
+ if (IS_ERR (prog ))
2567
+ return PTR_ERR (prog );
2568
+ }
2569
+
2570
+ return __tun_set_steering_ebpf (tun , prog );
2571
+ }
2572
+
2478
2573
static long __tun_chr_ioctl (struct file * file , unsigned int cmd ,
2479
2574
unsigned long arg , int ifreq_len )
2480
2575
{
@@ -2751,6 +2846,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
2751
2846
ret = 0 ;
2752
2847
break ;
2753
2848
2849
+ case TUNSETSTEERINGEBPF :
2850
+ ret = tun_set_steering_ebpf (tun , argp );
2851
+ break ;
2852
+
2754
2853
default :
2755
2854
ret = - EINVAL ;
2756
2855
break ;
0 commit comments