Skip to content

Commit 735fc40

Browse files
netoptimizerAlexei Starovoitov
authored andcommitted
xdp: change ndo_xdp_xmit API to support bulking
This patch change the API for ndo_xdp_xmit to support bulking xdp_frames. When kernel is compiled with CONFIG_RETPOLINE, XDP sees a huge slowdown. Most of the slowdown is caused by DMA API indirect function calls, but also the net_device->ndo_xdp_xmit() call. Benchmarked patch with CONFIG_RETPOLINE, using xdp_redirect_map with single flow/core test (CPU E5-1650 v4 @ 3.60GHz), showed performance improved: for driver ixgbe: 6,042,682 pps -> 6,853,768 pps = +811,086 pps for driver i40e : 6,187,169 pps -> 6,724,519 pps = +537,350 pps With frames avail as a bulk inside the driver ndo_xdp_xmit call, further optimizations are possible, like bulk DMA-mapping for TX. Testing without CONFIG_RETPOLINE show the same performance for physical NIC drivers. The virtual NIC driver tun sees a huge performance boost, as it can avoid doing per frame producer locking, but instead amortize the locking cost over the bulk. V2: Fix compile errors reported by kbuild test robot <[email protected]> V4: Isolated ndo, driver changes and callers. Signed-off-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 389ab7f commit 735fc40

File tree

8 files changed

+139
-64
lines changed

8 files changed

+139
-64
lines changed

drivers/net/ethernet/intel/i40e/i40e_txrx.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3664,26 +3664,38 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
36643664
* @dev: netdev
36653665
* @xdp: XDP buffer
36663666
*
3667-
* Returns Zero if sent, else an error code
3667+
* Returns number of frames successfully sent. Frames that fail are
3668+
* free'ed via XDP return API.
3669+
*
3670+
* For error cases, a negative errno code is returned and no-frames
3671+
* are transmitted (caller must handle freeing frames).
36683672
**/
3669-
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
3673+
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
36703674
{
36713675
struct i40e_netdev_priv *np = netdev_priv(dev);
36723676
unsigned int queue_index = smp_processor_id();
36733677
struct i40e_vsi *vsi = np->vsi;
3674-
int err;
3678+
int drops = 0;
3679+
int i;
36753680

36763681
if (test_bit(__I40E_VSI_DOWN, vsi->state))
36773682
return -ENETDOWN;
36783683

36793684
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
36803685
return -ENXIO;
36813686

3682-
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
3683-
if (err != I40E_XDP_TX)
3684-
return -ENOSPC;
3687+
for (i = 0; i < n; i++) {
3688+
struct xdp_frame *xdpf = frames[i];
3689+
int err;
36853690

3686-
return 0;
3691+
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
3692+
if (err != I40E_XDP_TX) {
3693+
xdp_return_frame_rx_napi(xdpf);
3694+
drops++;
3695+
}
3696+
}
3697+
3698+
return n - drops;
36873699
}
36883700

36893701
/**

drivers/net/ethernet/intel/i40e/i40e_txrx.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
487487
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
488488
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
489489
bool __i40e_chk_linearize(struct sk_buff *skb);
490-
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
490+
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
491491
void i40e_xdp_flush(struct net_device *dev);
492492

493493
/**

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1001710017
}
1001810018
}
1001910019

10020-
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
10020+
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
10021+
struct xdp_frame **frames)
1002110022
{
1002210023
struct ixgbe_adapter *adapter = netdev_priv(dev);
1002310024
struct ixgbe_ring *ring;
10024-
int err;
10025+
int drops = 0;
10026+
int i;
1002510027

1002610028
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
1002710029
return -ENETDOWN;
@@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
1003310035
if (unlikely(!ring))
1003410036
return -ENXIO;
1003510037

10036-
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
10037-
if (err != IXGBE_XDP_TX)
10038-
return -ENOSPC;
10038+
for (i = 0; i < n; i++) {
10039+
struct xdp_frame *xdpf = frames[i];
10040+
int err;
1003910041

10040-
return 0;
10042+
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
10043+
if (err != IXGBE_XDP_TX) {
10044+
xdp_return_frame_rx_napi(xdpf);
10045+
drops++;
10046+
}
10047+
}
10048+
10049+
return n - drops;
1004110050
}
1004210051

1004310052
static void ixgbe_xdp_flush(struct net_device *dev)

drivers/net/tun.c

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include <net/netns/generic.h>
7171
#include <net/rtnetlink.h>
7272
#include <net/sock.h>
73+
#include <net/xdp.h>
7374
#include <linux/seq_file.h>
7475
#include <linux/uio.h>
7576
#include <linux/skb_array.h>
@@ -1290,34 +1291,44 @@ static const struct net_device_ops tun_netdev_ops = {
12901291
.ndo_get_stats64 = tun_net_get_stats64,
12911292
};
12921293

1293-
static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
1294+
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
12941295
{
12951296
struct tun_struct *tun = netdev_priv(dev);
12961297
struct tun_file *tfile;
12971298
u32 numqueues;
1298-
int ret = 0;
1299+
int drops = 0;
1300+
int cnt = n;
1301+
int i;
12991302

13001303
rcu_read_lock();
13011304

13021305
numqueues = READ_ONCE(tun->numqueues);
13031306
if (!numqueues) {
1304-
ret = -ENOSPC;
1305-
goto out;
1307+
rcu_read_unlock();
1308+
return -ENXIO; /* Caller will free/return all frames */
13061309
}
13071310

13081311
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
13091312
numqueues]);
1310-
/* Encode the XDP flag into lowest bit for consumer to differ
1311-
* XDP buffer from sk_buff.
1312-
*/
1313-
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
1314-
this_cpu_inc(tun->pcpu_stats->tx_dropped);
1315-
ret = -ENOSPC;
1313+
1314+
spin_lock(&tfile->tx_ring.producer_lock);
1315+
for (i = 0; i < n; i++) {
1316+
struct xdp_frame *xdp = frames[i];
1317+
/* Encode the XDP flag into lowest bit for consumer to differ
1318+
* XDP buffer from sk_buff.
1319+
*/
1320+
void *frame = tun_xdp_to_ptr(xdp);
1321+
1322+
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
1323+
this_cpu_inc(tun->pcpu_stats->tx_dropped);
1324+
xdp_return_frame_rx_napi(xdp);
1325+
drops++;
1326+
}
13161327
}
1328+
spin_unlock(&tfile->tx_ring.producer_lock);
13171329

1318-
out:
13191330
rcu_read_unlock();
1320-
return ret;
1331+
return cnt - drops;
13211332
}
13221333

13231334
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
@@ -1327,7 +1338,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
13271338
if (unlikely(!frame))
13281339
return -EOVERFLOW;
13291340

1330-
return tun_xdp_xmit(dev, frame);
1341+
return tun_xdp_xmit(dev, 1, &frame);
13311342
}
13321343

13331344
static void tun_xdp_flush(struct net_device *dev)

drivers/net/virtio_net.c

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
419419
virtqueue_kick(sq->vq);
420420
}
421421

422-
static int __virtnet_xdp_xmit(struct virtnet_info *vi,
423-
struct xdp_frame *xdpf)
422+
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
423+
struct send_queue *sq,
424+
struct xdp_frame *xdpf)
424425
{
425426
struct virtio_net_hdr_mrg_rxbuf *hdr;
426-
struct xdp_frame *xdpf_sent;
427-
struct send_queue *sq;
428-
unsigned int len;
429-
unsigned int qp;
430427
int err;
431428

432-
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
433-
sq = &vi->sq[qp];
434-
435-
/* Free up any pending old buffers before queueing new ones. */
436-
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
437-
xdp_return_frame(xdpf_sent);
438-
439429
/* virtqueue want to use data area in-front of packet */
440430
if (unlikely(xdpf->metasize > 0))
441431
return -EOPNOTSUPP;
@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
459449
return 0;
460450
}
461451

462-
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
452+
static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
453+
struct xdp_frame *xdpf)
454+
{
455+
struct xdp_frame *xdpf_sent;
456+
struct send_queue *sq;
457+
unsigned int len;
458+
unsigned int qp;
459+
460+
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
461+
sq = &vi->sq[qp];
462+
463+
/* Free up any pending old buffers before queueing new ones. */
464+
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
465+
xdp_return_frame(xdpf_sent);
466+
467+
return __virtnet_xdp_xmit_one(vi, sq, xdpf);
468+
}
469+
470+
static int virtnet_xdp_xmit(struct net_device *dev,
471+
int n, struct xdp_frame **frames)
463472
{
464473
struct virtnet_info *vi = netdev_priv(dev);
465474
struct receive_queue *rq = vi->rq;
475+
struct xdp_frame *xdpf_sent;
466476
struct bpf_prog *xdp_prog;
477+
struct send_queue *sq;
478+
unsigned int len;
479+
unsigned int qp;
480+
int drops = 0;
481+
int err;
482+
int i;
483+
484+
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
485+
sq = &vi->sq[qp];
467486

468487
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
469488
* indicate XDP resources have been successfully allocated.
@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
472491
if (!xdp_prog)
473492
return -ENXIO;
474493

475-
return __virtnet_xdp_xmit(vi, xdpf);
494+
/* Free up any pending old buffers before queueing new ones. */
495+
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
496+
xdp_return_frame(xdpf_sent);
497+
498+
for (i = 0; i < n; i++) {
499+
struct xdp_frame *xdpf = frames[i];
500+
501+
err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
502+
if (err) {
503+
xdp_return_frame_rx_napi(xdpf);
504+
drops++;
505+
}
506+
}
507+
return n - drops;
476508
}
477509

478510
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
616648
xdpf = convert_to_xdp_frame(&xdp);
617649
if (unlikely(!xdpf))
618650
goto err_xdp;
619-
err = __virtnet_xdp_xmit(vi, xdpf);
651+
err = __virtnet_xdp_tx_xmit(vi, xdpf);
620652
if (unlikely(err)) {
621653
trace_xdp_exception(vi->dev, xdp_prog, act);
622654
goto err_xdp;
@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
779811
xdpf = convert_to_xdp_frame(&xdp);
780812
if (unlikely(!xdpf))
781813
goto err_xdp;
782-
err = __virtnet_xdp_xmit(vi, xdpf);
814+
err = __virtnet_xdp_tx_xmit(vi, xdpf);
783815
if (unlikely(err)) {
784816
trace_xdp_exception(vi->dev, xdp_prog, act);
785817
if (unlikely(xdp_page != page))

include/linux/netdevice.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,9 +1185,13 @@ struct dev_ifalias {
11851185
* This function is used to set or query state related to XDP on the
11861186
* netdevice and manage BPF offload. See definition of
11871187
* enum bpf_netdev_command for details.
1188-
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
1189-
* This function is used to submit a XDP packet for transmit on a
1190-
* netdevice.
1188+
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
1189+
* This function is used to submit @n XDP packets for transmit on a
1190+
* netdevice. Returns number of frames successfully transmitted, frames
1191+
* that got dropped are freed/returned via xdp_return_frame().
1192+
* Returns negative number, means general error invoking ndo, meaning
1193+
* no frames were xmit'ed and core-caller will free all frames.
1194+
* TODO: Consider add flag to allow sending flush operation.
11911195
* void (*ndo_xdp_flush)(struct net_device *dev);
11921196
* This function is used to inform the driver to flush a particular
11931197
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1375,8 +1379,8 @@ struct net_device_ops {
13751379
int needed_headroom);
13761380
int (*ndo_bpf)(struct net_device *dev,
13771381
struct netdev_bpf *bpf);
1378-
int (*ndo_xdp_xmit)(struct net_device *dev,
1379-
struct xdp_frame *xdp);
1382+
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
1383+
struct xdp_frame **xdp);
13801384
void (*ndo_xdp_flush)(struct net_device *dev);
13811385
};
13821386

kernel/bpf/devmap.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -232,24 +232,31 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
232232
prefetch(xdpf);
233233
}
234234

235-
for (i = 0; i < bq->count; i++) {
236-
struct xdp_frame *xdpf = bq->q[i];
237-
int err;
238-
239-
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
240-
if (err) {
241-
drops++;
242-
xdp_return_frame_rx_napi(xdpf);
243-
} else {
244-
sent++;
245-
}
235+
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
236+
if (sent < 0) {
237+
sent = 0;
238+
goto error;
246239
}
240+
drops = bq->count - sent;
241+
out:
247242
bq->count = 0;
248243

249244
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
250245
sent, drops, bq->dev_rx, dev);
251246
bq->dev_rx = NULL;
252247
return 0;
248+
error:
249+
/* If ndo_xdp_xmit fails with an errno, no frames have been
250+
* xmit'ed and it's our responsibility to them free all.
251+
*/
252+
for (i = 0; i < bq->count; i++) {
253+
struct xdp_frame *xdpf = bq->q[i];
254+
255+
/* RX path under NAPI protection, can return frames faster */
256+
xdp_return_frame_rx_napi(xdpf);
257+
drops++;
258+
}
259+
goto out;
253260
}
254261

255262
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled

net/core/filter.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3039,7 +3039,7 @@ static int __bpf_tx_xdp(struct net_device *dev,
30393039
u32 index)
30403040
{
30413041
struct xdp_frame *xdpf;
3042-
int err;
3042+
int sent;
30433043

30443044
if (!dev->netdev_ops->ndo_xdp_xmit) {
30453045
return -EOPNOTSUPP;
@@ -3049,9 +3049,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
30493049
if (unlikely(!xdpf))
30503050
return -EOVERFLOW;
30513051

3052-
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
3053-
if (err)
3054-
return err;
3052+
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
3053+
if (sent <= 0)
3054+
return sent;
30553055
dev->netdev_ops->ndo_xdp_flush(dev);
30563056
return 0;
30573057
}

0 commit comments

Comments
 (0)