Skip to content

Commit 10f6786

Browse files
author
Alexei Starovoitov
committed
Merge branch 'xdp_xmit-bulking'
Jesper Dangaard Brouer says: ==================== This patchset change ndo_xdp_xmit API to take a bulk of xdp frames. When kernel is compiled with CONFIG_RETPOLINE, every indirect function pointer (branch) call hurts performance. For XDP this have a huge negative performance impact. This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but also prepares for further optimizations. The DMA APIs use of indirect function pointer calls is the primary source the regression. It is left for a followup patchset, to use bulking calls towards the DMA API (via the scatter-gatter calls). The other advantage of this API change is that drivers can easier amortize the cost of any sync/locking scheme, over the bulk of packets. The assumption of the current API is that the driver implemementing the NDO will also allocate a dedicated XDP TX queue for every CPU in the system. Which is not always possible or practical to configure. E.g. ixgbe cannot load an XDP program on a machine with more than 96 CPUs, due to limited hardware TX queues. E.g. virtio_net is hard to configure as it requires manually increasing the queues. E.g. tun driver chooses to use a per XDP frame producer lock modulo smp_processor_id over avail queues. I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of this patchset. This will be a followup patchset, once we know if this will be needed (e.g. for non-map xdp_redirect flush-flag, and if AF_XDP chooses to use ndo_xdp_xmit for TX). --- V5: Fixed up issues spotted by Daniel and John V4: Splitout the patches from 4 to 8 patches. I cannot split the driver changes from the NDO change, but I've tried to isolated the NDO change together with the driver change as much as possible. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents f80acbd + a570e48 commit 10f6786

File tree

16 files changed

+448
-86
lines changed

16 files changed

+448
-86
lines changed

drivers/net/ethernet/intel/i40e/i40e_txrx.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3664,26 +3664,38 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
36643664
* @dev: netdev
36653665
* @xdp: XDP buffer
36663666
*
3667-
* Returns Zero if sent, else an error code
3667+
* Returns number of frames successfully sent. Frames that fail are
3668+
* free'ed via XDP return API.
3669+
*
3670+
* For error cases, a negative errno code is returned and no-frames
3671+
* are transmitted (caller must handle freeing frames).
36683672
**/
3669-
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
3673+
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
36703674
{
36713675
struct i40e_netdev_priv *np = netdev_priv(dev);
36723676
unsigned int queue_index = smp_processor_id();
36733677
struct i40e_vsi *vsi = np->vsi;
3674-
int err;
3678+
int drops = 0;
3679+
int i;
36753680

36763681
if (test_bit(__I40E_VSI_DOWN, vsi->state))
36773682
return -ENETDOWN;
36783683

36793684
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
36803685
return -ENXIO;
36813686

3682-
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
3683-
if (err != I40E_XDP_TX)
3684-
return -ENOSPC;
3687+
for (i = 0; i < n; i++) {
3688+
struct xdp_frame *xdpf = frames[i];
3689+
int err;
36853690

3686-
return 0;
3691+
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
3692+
if (err != I40E_XDP_TX) {
3693+
xdp_return_frame_rx_napi(xdpf);
3694+
drops++;
3695+
}
3696+
}
3697+
3698+
return n - drops;
36873699
}
36883700

36893701
/**

drivers/net/ethernet/intel/i40e/i40e_txrx.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
487487
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
488488
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
489489
bool __i40e_chk_linearize(struct sk_buff *skb);
490-
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
490+
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
491491
void i40e_xdp_flush(struct net_device *dev);
492492

493493
/**

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1001710017
}
1001810018
}
1001910019

10020-
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
10020+
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
10021+
struct xdp_frame **frames)
1002110022
{
1002210023
struct ixgbe_adapter *adapter = netdev_priv(dev);
1002310024
struct ixgbe_ring *ring;
10024-
int err;
10025+
int drops = 0;
10026+
int i;
1002510027

1002610028
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
1002710029
return -ENETDOWN;
@@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
1003310035
if (unlikely(!ring))
1003410036
return -ENXIO;
1003510037

10036-
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
10037-
if (err != IXGBE_XDP_TX)
10038-
return -ENOSPC;
10038+
for (i = 0; i < n; i++) {
10039+
struct xdp_frame *xdpf = frames[i];
10040+
int err;
1003910041

10040-
return 0;
10042+
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
10043+
if (err != IXGBE_XDP_TX) {
10044+
xdp_return_frame_rx_napi(xdpf);
10045+
drops++;
10046+
}
10047+
}
10048+
10049+
return n - drops;
1004110050
}
1004210051

1004310052
static void ixgbe_xdp_flush(struct net_device *dev)

drivers/net/tun.c

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include <net/netns/generic.h>
7171
#include <net/rtnetlink.h>
7272
#include <net/sock.h>
73+
#include <net/xdp.h>
7374
#include <linux/seq_file.h>
7475
#include <linux/uio.h>
7576
#include <linux/skb_array.h>
@@ -1290,34 +1291,44 @@ static const struct net_device_ops tun_netdev_ops = {
12901291
.ndo_get_stats64 = tun_net_get_stats64,
12911292
};
12921293

1293-
static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
1294+
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
12941295
{
12951296
struct tun_struct *tun = netdev_priv(dev);
12961297
struct tun_file *tfile;
12971298
u32 numqueues;
1298-
int ret = 0;
1299+
int drops = 0;
1300+
int cnt = n;
1301+
int i;
12991302

13001303
rcu_read_lock();
13011304

13021305
numqueues = READ_ONCE(tun->numqueues);
13031306
if (!numqueues) {
1304-
ret = -ENOSPC;
1305-
goto out;
1307+
rcu_read_unlock();
1308+
return -ENXIO; /* Caller will free/return all frames */
13061309
}
13071310

13081311
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
13091312
numqueues]);
1310-
/* Encode the XDP flag into lowest bit for consumer to differ
1311-
* XDP buffer from sk_buff.
1312-
*/
1313-
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
1314-
this_cpu_inc(tun->pcpu_stats->tx_dropped);
1315-
ret = -ENOSPC;
1313+
1314+
spin_lock(&tfile->tx_ring.producer_lock);
1315+
for (i = 0; i < n; i++) {
1316+
struct xdp_frame *xdp = frames[i];
1317+
/* Encode the XDP flag into lowest bit for consumer to differ
1318+
* XDP buffer from sk_buff.
1319+
*/
1320+
void *frame = tun_xdp_to_ptr(xdp);
1321+
1322+
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
1323+
this_cpu_inc(tun->pcpu_stats->tx_dropped);
1324+
xdp_return_frame_rx_napi(xdp);
1325+
drops++;
1326+
}
13161327
}
1328+
spin_unlock(&tfile->tx_ring.producer_lock);
13171329

1318-
out:
13191330
rcu_read_unlock();
1320-
return ret;
1331+
return cnt - drops;
13211332
}
13221333

13231334
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
@@ -1327,7 +1338,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
13271338
if (unlikely(!frame))
13281339
return -EOVERFLOW;
13291340

1330-
return tun_xdp_xmit(dev, frame);
1341+
return tun_xdp_xmit(dev, 1, &frame);
13311342
}
13321343

13331344
static void tun_xdp_flush(struct net_device *dev)

drivers/net/virtio_net.c

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
419419
virtqueue_kick(sq->vq);
420420
}
421421

422-
static int __virtnet_xdp_xmit(struct virtnet_info *vi,
423-
struct xdp_frame *xdpf)
422+
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
423+
struct send_queue *sq,
424+
struct xdp_frame *xdpf)
424425
{
425426
struct virtio_net_hdr_mrg_rxbuf *hdr;
426-
struct xdp_frame *xdpf_sent;
427-
struct send_queue *sq;
428-
unsigned int len;
429-
unsigned int qp;
430427
int err;
431428

432-
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
433-
sq = &vi->sq[qp];
434-
435-
/* Free up any pending old buffers before queueing new ones. */
436-
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
437-
xdp_return_frame(xdpf_sent);
438-
439429
/* virtqueue want to use data area in-front of packet */
440430
if (unlikely(xdpf->metasize > 0))
441431
return -EOPNOTSUPP;
@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
459449
return 0;
460450
}
461451

462-
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
452+
static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
453+
struct xdp_frame *xdpf)
454+
{
455+
struct xdp_frame *xdpf_sent;
456+
struct send_queue *sq;
457+
unsigned int len;
458+
unsigned int qp;
459+
460+
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
461+
sq = &vi->sq[qp];
462+
463+
/* Free up any pending old buffers before queueing new ones. */
464+
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
465+
xdp_return_frame(xdpf_sent);
466+
467+
return __virtnet_xdp_xmit_one(vi, sq, xdpf);
468+
}
469+
470+
static int virtnet_xdp_xmit(struct net_device *dev,
471+
int n, struct xdp_frame **frames)
463472
{
464473
struct virtnet_info *vi = netdev_priv(dev);
465474
struct receive_queue *rq = vi->rq;
475+
struct xdp_frame *xdpf_sent;
466476
struct bpf_prog *xdp_prog;
477+
struct send_queue *sq;
478+
unsigned int len;
479+
unsigned int qp;
480+
int drops = 0;
481+
int err;
482+
int i;
483+
484+
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
485+
sq = &vi->sq[qp];
467486

468487
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
469488
* indicate XDP resources have been successfully allocated.
@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
472491
if (!xdp_prog)
473492
return -ENXIO;
474493

475-
return __virtnet_xdp_xmit(vi, xdpf);
494+
/* Free up any pending old buffers before queueing new ones. */
495+
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
496+
xdp_return_frame(xdpf_sent);
497+
498+
for (i = 0; i < n; i++) {
499+
struct xdp_frame *xdpf = frames[i];
500+
501+
err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
502+
if (err) {
503+
xdp_return_frame_rx_napi(xdpf);
504+
drops++;
505+
}
506+
}
507+
return n - drops;
476508
}
477509

478510
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
616648
xdpf = convert_to_xdp_frame(&xdp);
617649
if (unlikely(!xdpf))
618650
goto err_xdp;
619-
err = __virtnet_xdp_xmit(vi, xdpf);
651+
err = __virtnet_xdp_tx_xmit(vi, xdpf);
620652
if (unlikely(err)) {
621653
trace_xdp_exception(vi->dev, xdp_prog, act);
622654
goto err_xdp;
@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
779811
xdpf = convert_to_xdp_frame(&xdp);
780812
if (unlikely(!xdpf))
781813
goto err_xdp;
782-
err = __virtnet_xdp_xmit(vi, xdpf);
814+
err = __virtnet_xdp_tx_xmit(vi, xdpf);
783815
if (unlikely(err)) {
784816
trace_xdp_exception(vi->dev, xdp_prog, act);
785817
if (unlikely(xdp_page != page))

include/linux/bpf.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -487,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
487487
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
488488

489489
/* Map specifics */
490-
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
490+
struct xdp_buff;
491+
492+
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
491493
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
492494
void __dev_map_flush(struct bpf_map *map);
495+
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
496+
struct net_device *dev_rx);
493497

494498
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
495499
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
496500
void __cpu_map_flush(struct bpf_map *map);
497-
struct xdp_buff;
498501
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
499502
struct net_device *dev_rx);
500503

@@ -573,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
573576
{
574577
}
575578

579+
struct xdp_buff;
580+
struct bpf_dtab_netdev;
581+
582+
static inline
583+
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
584+
struct net_device *dev_rx)
585+
{
586+
return 0;
587+
}
588+
576589
static inline
577590
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
578591
{
@@ -587,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
587600
{
588601
}
589602

590-
struct xdp_buff;
591603
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
592604
struct xdp_buff *xdp,
593605
struct net_device *dev_rx)

include/linux/netdevice.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,9 +1185,13 @@ struct dev_ifalias {
11851185
* This function is used to set or query state related to XDP on the
11861186
* netdevice and manage BPF offload. See definition of
11871187
* enum bpf_netdev_command for details.
1188-
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
1189-
* This function is used to submit a XDP packet for transmit on a
1190-
* netdevice.
1188+
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
1189+
* This function is used to submit @n XDP packets for transmit on a
1190+
* netdevice. Returns number of frames successfully transmitted, frames
1191+
* that got dropped are freed/returned via xdp_return_frame().
1192+
* Returns negative number, means general error invoking ndo, meaning
1193+
* no frames were xmit'ed and core-caller will free all frames.
1194+
* TODO: Consider add flag to allow sending flush operation.
11911195
* void (*ndo_xdp_flush)(struct net_device *dev);
11921196
* This function is used to inform the driver to flush a particular
11931197
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1375,8 +1379,8 @@ struct net_device_ops {
13751379
int needed_headroom);
13761380
int (*ndo_bpf)(struct net_device *dev,
13771381
struct netdev_bpf *bpf);
1378-
int (*ndo_xdp_xmit)(struct net_device *dev,
1379-
struct xdp_frame *xdp);
1382+
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
1383+
struct xdp_frame **xdp);
13801384
void (*ndo_xdp_flush)(struct net_device *dev);
13811385
};
13821386

include/net/page_pool.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
115115
void __page_pool_put_page(struct page_pool *pool,
116116
struct page *page, bool allow_direct);
117117

118-
static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
118+
static inline void page_pool_put_page(struct page_pool *pool,
119+
struct page *page, bool allow_direct)
119120
{
120121
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
121122
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
122123
*/
123124
#ifdef CONFIG_PAGE_POOL
124-
__page_pool_put_page(pool, page, false);
125+
__page_pool_put_page(pool, page, allow_direct);
125126
#endif
126127
}
127128
/* Very limited use-cases allow recycle direct */

include/net/xdp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
104104
}
105105

106106
void xdp_return_frame(struct xdp_frame *xdpf);
107+
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
107108
void xdp_return_buff(struct xdp_buff *xdp);
108109

109110
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,

0 commit comments

Comments
 (0)