Skip to content

Commit d139600

Browse files
Toshiaki Makitaborkmann
authored andcommitted
veth: Add XDP TX and REDIRECT
This allows further redirection of xdp_frames like NIC -> veth--veth -> veth--veth (XDP) (XDP) (XDP) The intermediate XDP, redirecting packets from NIC to the other veth, reuses xdp_mem_info from NIC so that page recycling of the NIC works on the destination veth's XDP. In this way return_frame is not fully guarded by NAPI, since another NAPI handler on another cpu may use the same xdp_mem_info concurrently. Thus disable napi_direct by xdp_set_return_frame_no_direct() during the NAPI context. v8: - Don't use xdp_frame pointer address for data_hard_start of xdp_buff. v4: - Use xdp_[set|clear]_return_frame_no_direct() instead of a flag in xdp_mem_info. v3: - Fix double free when veth_xdp_tx() returns a positive value. - Convert xdp_xmit and xdp_redir variables into flags. Signed-off-by: Toshiaki Makita <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 2539650 commit d139600

File tree

1 file changed

+110
-9
lines changed

1 file changed

+110
-9
lines changed

drivers/net/veth.c

Lines changed: 110 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
#define VETH_RING_SIZE 256
3333
#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
3434

35+
/* Separating two types of XDP xmit */
36+
#define VETH_XDP_TX BIT(0)
37+
#define VETH_XDP_REDIR BIT(1)
38+
3539
struct pcpu_vstats {
3640
u64 packets;
3741
u64 bytes;
@@ -45,6 +49,7 @@ struct veth_priv {
4549
struct bpf_prog *_xdp_prog;
4650
struct net_device __rcu *peer;
4751
atomic64_t dropped;
52+
struct xdp_mem_info xdp_mem;
4853
unsigned requested_headroom;
4954
bool rx_notify_masked;
5055
struct ptr_ring xdp_ring;
@@ -317,12 +322,44 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
317322
return n - drops;
318323
}
319324

325+
static void veth_xdp_flush(struct net_device *dev)
326+
{
327+
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
328+
struct net_device *rcv;
329+
330+
rcu_read_lock();
331+
rcv = rcu_dereference(priv->peer);
332+
if (unlikely(!rcv))
333+
goto out;
334+
335+
rcv_priv = netdev_priv(rcv);
336+
/* xdp_ring is initialized on receive side? */
337+
if (unlikely(!rcu_access_pointer(rcv_priv->xdp_prog)))
338+
goto out;
339+
340+
__veth_xdp_flush(rcv_priv);
341+
out:
342+
rcu_read_unlock();
343+
}
344+
345+
static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
346+
{
347+
struct xdp_frame *frame = convert_to_xdp_frame(xdp);
348+
349+
if (unlikely(!frame))
350+
return -EOVERFLOW;
351+
352+
return veth_xdp_xmit(dev, 1, &frame, 0);
353+
}
354+
320355
static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
321-
struct xdp_frame *frame)
356+
struct xdp_frame *frame,
357+
unsigned int *xdp_xmit)
322358
{
323359
void *hard_start = frame->data - frame->headroom;
324360
void *head = hard_start - sizeof(struct xdp_frame);
325361
int len = frame->len, delta = 0;
362+
struct xdp_frame orig_frame;
326363
struct bpf_prog *xdp_prog;
327364
unsigned int headroom;
328365
struct sk_buff *skb;
@@ -346,6 +383,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
346383
delta = frame->data - xdp.data;
347384
len = xdp.data_end - xdp.data;
348385
break;
386+
case XDP_TX:
387+
orig_frame = *frame;
388+
xdp.data_hard_start = head;
389+
xdp.rxq->mem = frame->mem;
390+
if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
391+
trace_xdp_exception(priv->dev, xdp_prog, act);
392+
frame = &orig_frame;
393+
goto err_xdp;
394+
}
395+
*xdp_xmit |= VETH_XDP_TX;
396+
rcu_read_unlock();
397+
goto xdp_xmit;
398+
case XDP_REDIRECT:
399+
orig_frame = *frame;
400+
xdp.data_hard_start = head;
401+
xdp.rxq->mem = frame->mem;
402+
if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) {
403+
frame = &orig_frame;
404+
goto err_xdp;
405+
}
406+
*xdp_xmit |= VETH_XDP_REDIR;
407+
rcu_read_unlock();
408+
goto xdp_xmit;
349409
default:
350410
bpf_warn_invalid_xdp_action(act);
351411
case XDP_ABORTED:
@@ -370,12 +430,13 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
370430
err_xdp:
371431
rcu_read_unlock();
372432
xdp_return_frame(frame);
373-
433+
xdp_xmit:
374434
return NULL;
375435
}
376436

377437
static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
378-
struct sk_buff *skb)
438+
struct sk_buff *skb,
439+
unsigned int *xdp_xmit)
379440
{
380441
u32 pktlen, headroom, act, metalen;
381442
void *orig_data, *orig_data_end;
@@ -447,6 +508,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
447508
switch (act) {
448509
case XDP_PASS:
449510
break;
511+
case XDP_TX:
512+
get_page(virt_to_page(xdp.data));
513+
consume_skb(skb);
514+
xdp.rxq->mem = priv->xdp_mem;
515+
if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
516+
trace_xdp_exception(priv->dev, xdp_prog, act);
517+
goto err_xdp;
518+
}
519+
*xdp_xmit |= VETH_XDP_TX;
520+
rcu_read_unlock();
521+
goto xdp_xmit;
522+
case XDP_REDIRECT:
523+
get_page(virt_to_page(xdp.data));
524+
consume_skb(skb);
525+
xdp.rxq->mem = priv->xdp_mem;
526+
if (xdp_do_redirect(priv->dev, &xdp, xdp_prog))
527+
goto err_xdp;
528+
*xdp_xmit |= VETH_XDP_REDIR;
529+
rcu_read_unlock();
530+
goto xdp_xmit;
450531
default:
451532
bpf_warn_invalid_xdp_action(act);
452533
case XDP_ABORTED:
@@ -477,9 +558,15 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
477558
rcu_read_unlock();
478559
kfree_skb(skb);
479560
return NULL;
561+
err_xdp:
562+
rcu_read_unlock();
563+
page_frag_free(xdp.data);
564+
xdp_xmit:
565+
return NULL;
480566
}
481567

482-
static int veth_xdp_rcv(struct veth_priv *priv, int budget)
568+
static int veth_xdp_rcv(struct veth_priv *priv, int budget,
569+
unsigned int *xdp_xmit)
483570
{
484571
int i, done = 0;
485572

@@ -490,10 +577,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget)
490577
if (!ptr)
491578
break;
492579

493-
if (veth_is_xdp_frame(ptr))
494-
skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr));
495-
else
496-
skb = veth_xdp_rcv_skb(priv, ptr);
580+
if (veth_is_xdp_frame(ptr)) {
581+
skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr),
582+
xdp_xmit);
583+
} else {
584+
skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit);
585+
}
497586

498587
if (skb)
499588
napi_gro_receive(&priv->xdp_napi, skb);
@@ -508,9 +597,11 @@ static int veth_poll(struct napi_struct *napi, int budget)
508597
{
509598
struct veth_priv *priv =
510599
container_of(napi, struct veth_priv, xdp_napi);
600+
unsigned int xdp_xmit = 0;
511601
int done;
512602

513-
done = veth_xdp_rcv(priv, budget);
603+
xdp_set_return_frame_no_direct();
604+
done = veth_xdp_rcv(priv, budget, &xdp_xmit);
514605

515606
if (done < budget && napi_complete_done(napi, done)) {
516607
/* Write rx_notify_masked before reading ptr_ring */
@@ -521,6 +612,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
521612
}
522613
}
523614

615+
if (xdp_xmit & VETH_XDP_TX)
616+
veth_xdp_flush(priv->dev);
617+
if (xdp_xmit & VETH_XDP_REDIR)
618+
xdp_do_flush_map();
619+
xdp_clear_return_frame_no_direct();
620+
524621
return done;
525622
}
526623

@@ -567,6 +664,9 @@ static int veth_enable_xdp(struct net_device *dev)
567664
err = veth_napi_add(dev);
568665
if (err)
569666
goto err;
667+
668+
/* Save original mem info as it can be overwritten */
669+
priv->xdp_mem = priv->xdp_rxq.mem;
570670
}
571671

572672
rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog);
@@ -584,6 +684,7 @@ static void veth_disable_xdp(struct net_device *dev)
584684

585685
rcu_assign_pointer(priv->xdp_prog, NULL);
586686
veth_napi_del(dev);
687+
priv->xdp_rxq.mem = priv->xdp_mem;
587688
xdp_rxq_info_unreg(&priv->xdp_rxq);
588689
}
589690

0 commit comments

Comments
 (0)