Skip to content

Commit 96f8406

Browse files
jasowangdavem330
authored andcommitted
tun: add eBPF based queue selection method
This patch introduces an eBPF based queue selection method. With this, the policy could be offloaded to userspace completely through a new ioctl TUNSETSTEERINGEBPF. Signed-off-by: Jason Wang <[email protected]> Acked-by: Willem de Bruijn <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f520957 commit 96f8406

File tree

2 files changed

+123
-23
lines changed

2 files changed

+123
-23
lines changed

drivers/net/tun.c

Lines changed: 122 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,11 @@ struct tun_flow_entry {
195195

196196
#define TUN_NUM_FLOW_ENTRIES 1024
197197

198+
struct tun_steering_prog {
199+
struct rcu_head rcu;
200+
struct bpf_prog *prog;
201+
};
202+
198203
/* Since the socket were moved to tun_file, to preserve the behavior of persist
199204
* device, socket filter, sndbuf and vnet header size were restore when the
200205
* file were attached to a persist device.
@@ -232,6 +237,7 @@ struct tun_struct {
232237
u32 rx_batched;
233238
struct tun_pcpu_stats __percpu *pcpu_stats;
234239
struct bpf_prog __rcu *xdp_prog;
240+
struct tun_steering_prog __rcu *steering_prog;
235241
};
236242

237243
static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -537,15 +543,12 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
537543
* different rxq no. here. If we could not get rxhash, then we would
538544
* hope the rxq no. may help here.
539545
*/
540-
static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
541-
void *accel_priv, select_queue_fallback_t fallback)
546+
static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
542547
{
543-
struct tun_struct *tun = netdev_priv(dev);
544548
struct tun_flow_entry *e;
545549
u32 txq = 0;
546550
u32 numqueues = 0;
547551

548-
rcu_read_lock();
549552
numqueues = READ_ONCE(tun->numqueues);
550553

551554
txq = __skb_get_hash_symmetric(skb);
@@ -563,10 +566,37 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
563566
txq -= numqueues;
564567
}
565568

566-
rcu_read_unlock();
567569
return txq;
568570
}
569571

572+
static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
573+
{
574+
struct tun_steering_prog *prog;
575+
u16 ret = 0;
576+
577+
prog = rcu_dereference(tun->steering_prog);
578+
if (prog)
579+
ret = bpf_prog_run_clear_cb(prog->prog, skb);
580+
581+
return ret % tun->numqueues;
582+
}
583+
584+
static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
585+
void *accel_priv, select_queue_fallback_t fallback)
586+
{
587+
struct tun_struct *tun = netdev_priv(dev);
588+
u16 ret;
589+
590+
rcu_read_lock();
591+
if (rcu_dereference(tun->steering_prog))
592+
ret = tun_ebpf_select_queue(tun, skb);
593+
else
594+
ret = tun_automq_select_queue(tun, skb);
595+
rcu_read_unlock();
596+
597+
return ret;
598+
}
599+
570600
static inline bool tun_not_capable(struct tun_struct *tun)
571601
{
572602
const struct cred *cred = current_cred();
@@ -933,23 +963,10 @@ static int tun_net_close(struct net_device *dev)
933963
}
934964

935965
/* Net device start xmit */
936-
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
966+
static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
937967
{
938-
struct tun_struct *tun = netdev_priv(dev);
939-
int txq = skb->queue_mapping;
940-
struct tun_file *tfile;
941-
u32 numqueues = 0;
942-
943-
rcu_read_lock();
944-
tfile = rcu_dereference(tun->tfiles[txq]);
945-
numqueues = READ_ONCE(tun->numqueues);
946-
947-
/* Drop packet if interface is not attached */
948-
if (txq >= numqueues)
949-
goto drop;
950-
951968
#ifdef CONFIG_RPS
952-
if (numqueues == 1 && static_key_false(&rps_needed)) {
969+
if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
953970
/* Select queue was not called for the skbuff, so we extract the
954971
* RPS hash and save it into the flow_table here.
955972
*/
@@ -965,6 +982,26 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
965982
}
966983
}
967984
#endif
985+
}
986+
987+
/* Net device start xmit */
988+
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
989+
{
990+
struct tun_struct *tun = netdev_priv(dev);
991+
int txq = skb->queue_mapping;
992+
struct tun_file *tfile;
993+
u32 numqueues = 0;
994+
995+
rcu_read_lock();
996+
tfile = rcu_dereference(tun->tfiles[txq]);
997+
numqueues = READ_ONCE(tun->numqueues);
998+
999+
/* Drop packet if interface is not attached */
1000+
if (txq >= numqueues)
1001+
goto drop;
1002+
1003+
if (!rcu_dereference(tun->steering_prog))
1004+
tun_automq_xmit(tun, skb);
9681005

9691006
tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
9701007

@@ -1547,7 +1584,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
15471584
int copylen;
15481585
bool zerocopy = false;
15491586
int err;
1550-
u32 rxhash;
1587+
u32 rxhash = 0;
15511588
int skb_xdp = 1;
15521589
bool frags = tun_napi_frags_enabled(tun);
15531590

@@ -1735,7 +1772,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
17351772
rcu_read_unlock();
17361773
}
17371774

1738-
rxhash = __skb_get_hash_symmetric(skb);
1775+
rcu_read_lock();
1776+
if (!rcu_dereference(tun->steering_prog))
1777+
rxhash = __skb_get_hash_symmetric(skb);
1778+
rcu_read_unlock();
17391779

17401780
if (frags) {
17411781
/* Exercise flow dissector code path. */
@@ -1779,7 +1819,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
17791819
u64_stats_update_end(&stats->syncp);
17801820
put_cpu_ptr(stats);
17811821

1782-
tun_flow_update(tun, rxhash, tfile);
1822+
if (rxhash)
1823+
tun_flow_update(tun, rxhash, tfile);
1824+
17831825
return total_len;
17841826
}
17851827

@@ -1987,6 +2029,36 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
19872029
return ret;
19882030
}
19892031

2032+
static void tun_steering_prog_free(struct rcu_head *rcu)
2033+
{
2034+
struct tun_steering_prog *prog = container_of(rcu,
2035+
struct tun_steering_prog, rcu);
2036+
2037+
bpf_prog_destroy(prog->prog);
2038+
kfree(prog);
2039+
}
2040+
2041+
static int __tun_set_steering_ebpf(struct tun_struct *tun,
2042+
struct bpf_prog *prog)
2043+
{
2044+
struct tun_steering_prog *old, *new = NULL;
2045+
2046+
if (prog) {
2047+
new = kmalloc(sizeof(*new), GFP_KERNEL);
2048+
if (!new)
2049+
return -ENOMEM;
2050+
new->prog = prog;
2051+
}
2052+
2053+
old = rtnl_dereference(tun->steering_prog);
2054+
rcu_assign_pointer(tun->steering_prog, new);
2055+
2056+
if (old)
2057+
call_rcu(&old->rcu, tun_steering_prog_free);
2058+
2059+
return 0;
2060+
}
2061+
19902062
static void tun_free_netdev(struct net_device *dev)
19912063
{
19922064
struct tun_struct *tun = netdev_priv(dev);
@@ -1995,6 +2067,9 @@ static void tun_free_netdev(struct net_device *dev)
19952067
free_percpu(tun->pcpu_stats);
19962068
tun_flow_uninit(tun);
19972069
security_tun_dev_free_security(tun->security);
2070+
rtnl_lock();
2071+
__tun_set_steering_ebpf(tun, NULL);
2072+
rtnl_unlock();
19982073
}
19992074

20002075
static void tun_setup(struct net_device *dev)
@@ -2283,6 +2358,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
22832358
tun->filter_attached = false;
22842359
tun->sndbuf = tfile->socket.sk->sk_sndbuf;
22852360
tun->rx_batched = 0;
2361+
RCU_INIT_POINTER(tun->steering_prog, NULL);
22862362

22872363
tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
22882364
if (!tun->pcpu_stats) {
@@ -2475,6 +2551,25 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
24752551
return ret;
24762552
}
24772553

2554+
static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
2555+
{
2556+
struct bpf_prog *prog;
2557+
int fd;
2558+
2559+
if (copy_from_user(&fd, data, sizeof(fd)))
2560+
return -EFAULT;
2561+
2562+
if (fd == -1) {
2563+
prog = NULL;
2564+
} else {
2565+
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
2566+
if (IS_ERR(prog))
2567+
return PTR_ERR(prog);
2568+
}
2569+
2570+
return __tun_set_steering_ebpf(tun, prog);
2571+
}
2572+
24782573
static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
24792574
unsigned long arg, int ifreq_len)
24802575
{
@@ -2751,6 +2846,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
27512846
ret = 0;
27522847
break;
27532848

2849+
case TUNSETSTEERINGEBPF:
2850+
ret = tun_set_steering_ebpf(tun, argp);
2851+
break;
2852+
27542853
default:
27552854
ret = -EINVAL;
27562855
break;

include/uapi/linux/if_tun.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
*/
5858
#define TUNSETVNETBE _IOW('T', 222, int)
5959
#define TUNGETVNETBE _IOR('T', 223, int)
60+
#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
6061

6162
/* TUNSETIFF ifr flags */
6263
#define IFF_TUN 0x0001

0 commit comments

Comments
 (0)