Skip to content

Commit f601899

Browse files
Wei Fangdavem330
authored andcommitted
net: fec: add XDP_TX feature support
The XDP_TX feature is not supported before, and all the frames which are deemed to do XDP_TX action actually do the XDP_DROP action. So this patch adds the XDP_TX support to FEC driver. I tested the performance of XDP_TX in XDP_DRV mode and XDP_SKB mode respectively on i.MX8MP-EVK platform, and as suggested by Jesper, I also tested the performance of XDP_REDIRECT on the same platform. And the test steps and results are as follows. XDP_TX test: Step 1: One board is used as generator and connects to switch,and the FEC port of DUT also connects to the switch. Both boards with flow control off. Then the generator runs the pktgen_sample03_burst_single_flow.sh script to generate and send burst traffic to DUT. Note that the size of packet was set to 64 bytes and the procotol of packet was UDP in my test scenario. In addition, the SMAC of the packet need to be different from the MAC of the generator, because the xdp2 program will swap the DMAC and SMAC of the packet and send it back to the generator. If the SMAC of the generated packet is the MAC of the generator, the generator will receive the returned traffic which increase the CPU loading and significantly degrade the transmit speed of the generator, and finally it affects the test of XDP_TX performance. Step 2: The DUT runs the xdp2 program to transmit received UDP packets back out on the same port where they were received. root@imx8mpevk:~# ./xdp2 eth0 proto 17: 353918 pkt/s proto 17: 352923 pkt/s proto 17: 353900 pkt/s proto 17: 352672 pkt/s proto 17: 353912 pkt/s proto 17: 354219 pkt/s root@imx8mpevk:~# ./xdp2 -S eth0 proto 17: 160604 pkt/s proto 17: 160708 pkt/s proto 17: 160564 pkt/s proto 17: 160684 pkt/s proto 17: 160640 pkt/s proto 17: 160720 pkt/s The above results show that the XDP_TX performance of XDP_DRV mode is much better than XDP_SKB mode, more than twice that of XDP_SKB mode, which is in line with our expectation. XDP_REDIRECT test: Step1: Both the generator and the FEC port of the DUT connet to the switch port. All the ports with flow control off, then the generator runs the pktgen script to generate and send burst traffic to DUT. Note that the size of packet was set to 64 bytes and the procotol of packet was UDP in my test scenario. Step2: The DUT runs the xdp_redirect program to redirect the traffic from the FEC port to the FEC port itself. root@imx8mpevk:~# ./xdp_redirect eth0 eth0 Redirecting from eth0 (ifindex 2; driver fec) to eth0 (ifindex 2; driver fec) Summary 232,302 rx/s 0 err,drop/s 232,344 xmit/s Summary 234,579 rx/s 0 err,drop/s 234,577 xmit/s Summary 235,548 rx/s 0 err,drop/s 235,549 xmit/s Summary 234,704 rx/s 0 err,drop/s 234,703 xmit/s Summary 235,504 rx/s 0 err,drop/s 235,504 xmit/s Summary 235,223 rx/s 0 err,drop/s 235,224 xmit/s Summary 234,509 rx/s 0 err,drop/s 234,507 xmit/s Summary 235,481 rx/s 0 err,drop/s 235,482 xmit/s Summary 234,684 rx/s 0 err,drop/s 234,683 xmit/s Summary 235,520 rx/s 0 err,drop/s 235,520 xmit/s Summary 235,461 rx/s 0 err,drop/s 235,461 xmit/s Summary 234,627 rx/s 0 err,drop/s 234,627 xmit/s Summary 235,611 rx/s 0 err,drop/s 235,611 xmit/s Packets received : 3,053,753 Average packets/s : 234,904 Packets transmitted : 3,053,792 Average transmit/s : 234,907 Compared the performance of XDP_TX with XDP_REDIRECT, XDP_TX is also much better than XDP_REDIRECT. It's also in line with our expectation. Signed-off-by: Wei Fang <[email protected]> Suggested-by: Jesper Dangaard Brouer <[email protected]> Suggested-by: Jakub Kicinski <[email protected]> Reviewed-by: Larysa Zaremba <[email protected]> Acked-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e56e220 commit f601899

File tree

2 files changed

+87
-21
lines changed

2 files changed

+87
-21
lines changed

drivers/net/ethernet/freescale/fec.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ enum {
548548
enum fec_txbuf_type {
549549
FEC_TXBUF_T_SKB,
550550
FEC_TXBUF_T_XDP_NDO,
551+
FEC_TXBUF_T_XDP_TX,
551552
};
552553

553554
struct fec_tx_buffer {

drivers/net/ethernet/freescale/fec_main.c

Lines changed: 86 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,17 @@
6969
#include <soc/imx/cpuidle.h>
7070
#include <linux/filter.h>
7171
#include <linux/bpf.h>
72+
#include <linux/bpf_trace.h>
7273

7374
#include <asm/cacheflush.h>
7475

7576
#include "fec.h"
7677

7778
static void set_multicast_list(struct net_device *ndev);
7879
static void fec_enet_itr_coal_set(struct net_device *ndev);
80+
static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
81+
int cpu, struct xdp_buff *xdp,
82+
u32 dma_sync_len);
7983

8084
#define DRIVER_NAME "fec"
8185

@@ -961,7 +965,8 @@ static void fec_enet_bd_init(struct net_device *dev)
961965
txq->tx_buf[i].skb = NULL;
962966
}
963967
} else {
964-
if (bdp->cbd_bufaddr)
968+
if (bdp->cbd_bufaddr &&
969+
txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO)
965970
dma_unmap_single(&fep->pdev->dev,
966971
fec32_to_cpu(bdp->cbd_bufaddr),
967972
fec16_to_cpu(bdp->cbd_datlen),
@@ -1424,13 +1429,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget)
14241429
break;
14251430

14261431
xdpf = txq->tx_buf[index].xdp;
1427-
if (bdp->cbd_bufaddr)
1432+
if (bdp->cbd_bufaddr &&
1433+
txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO)
14281434
dma_unmap_single(&fep->pdev->dev,
14291435
fec32_to_cpu(bdp->cbd_bufaddr),
14301436
fec16_to_cpu(bdp->cbd_datlen),
14311437
DMA_TO_DEVICE);
14321438
bdp->cbd_bufaddr = cpu_to_fec32(0);
1433-
if (!xdpf) {
1439+
if (unlikely(!xdpf)) {
14341440
txq->tx_buf[index].type = FEC_TXBUF_T_SKB;
14351441
goto tx_buf_done;
14361442
}
@@ -1483,7 +1489,16 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget)
14831489
/* Free the sk buffer associated with this last transmit */
14841490
dev_kfree_skb_any(skb);
14851491
} else {
1486-
xdp_return_frame(xdpf);
1492+
if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO) {
1493+
xdp_return_frame_rx_napi(xdpf);
1494+
} else { /* recycle pages of XDP_TX frames */
1495+
struct page *page = virt_to_head_page(xdpf->data);
1496+
1497+
/* The dma_sync_size = 0 as XDP_TX has already
1498+
* synced DMA for_device.
1499+
*/
1500+
page_pool_put_page(page->pp, page, 0, true);
1501+
}
14871502

14881503
txq->tx_buf[index].xdp = NULL;
14891504
/* restore default tx buffer type: FEC_TXBUF_T_SKB */
@@ -1542,7 +1557,7 @@ static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
15421557

15431558
static u32
15441559
fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
1545-
struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int index)
1560+
struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu)
15461561
{
15471562
unsigned int sync, len = xdp->data_end - xdp->data;
15481563
u32 ret = FEC_ENET_XDP_PASS;
@@ -1552,8 +1567,10 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
15521567

15531568
act = bpf_prog_run_xdp(prog, xdp);
15541569

1555-
/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
1556-
sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM;
1570+
/* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover
1571+
* max len CPU touch
1572+
*/
1573+
sync = xdp->data_end - xdp->data;
15571574
sync = max(sync, len);
15581575

15591576
switch (act) {
@@ -1574,11 +1591,19 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
15741591
}
15751592
break;
15761593

1577-
default:
1578-
bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
1579-
fallthrough;
1580-
15811594
case XDP_TX:
1595+
err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync);
1596+
if (unlikely(err)) {
1597+
ret = FEC_ENET_XDP_CONSUMED;
1598+
page = virt_to_head_page(xdp->data);
1599+
page_pool_put_page(rxq->page_pool, page, sync, true);
1600+
trace_xdp_exception(fep->netdev, prog, act);
1601+
} else {
1602+
ret = FEC_ENET_XDP_TX;
1603+
}
1604+
break;
1605+
1606+
default:
15821607
bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
15831608
fallthrough;
15841609

@@ -1620,6 +1645,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
16201645
struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
16211646
u32 ret, xdp_result = FEC_ENET_XDP_PASS;
16221647
u32 data_start = FEC_ENET_XDP_HEADROOM;
1648+
int cpu = smp_processor_id();
16231649
struct xdp_buff xdp;
16241650
struct page *page;
16251651
u32 sub_len = 4;
@@ -1698,7 +1724,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
16981724
/* subtract 16bit shift and FCS */
16991725
xdp_prepare_buff(&xdp, page_address(page),
17001726
data_start, pkt_len - sub_len, false);
1701-
ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, index);
1727+
ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu);
17021728
xdp_result |= ret;
17031729
if (ret != FEC_ENET_XDP_PASS)
17041730
goto rx_processing_done;
@@ -3767,7 +3793,8 @@ fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index)
37673793

37683794
static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
37693795
struct fec_enet_priv_tx_q *txq,
3770-
struct xdp_frame *frame)
3796+
struct xdp_frame *frame,
3797+
u32 dma_sync_len, bool ndo_xmit)
37713798
{
37723799
unsigned int index, status, estatus;
37733800
struct bufdesc *bdp;
@@ -3787,10 +3814,24 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
37873814

37883815
index = fec_enet_get_bd_index(bdp, &txq->bd);
37893816

3790-
dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
3791-
frame->len, DMA_TO_DEVICE);
3792-
if (dma_mapping_error(&fep->pdev->dev, dma_addr))
3793-
return -ENOMEM;
3817+
if (ndo_xmit) {
3818+
dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
3819+
frame->len, DMA_TO_DEVICE);
3820+
if (dma_mapping_error(&fep->pdev->dev, dma_addr))
3821+
return -ENOMEM;
3822+
3823+
txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
3824+
} else {
3825+
struct page *page = virt_to_page(frame->data);
3826+
3827+
dma_addr = page_pool_get_dma_addr(page) + sizeof(*frame) +
3828+
frame->headroom;
3829+
dma_sync_single_for_device(&fep->pdev->dev, dma_addr,
3830+
dma_sync_len, DMA_BIDIRECTIONAL);
3831+
txq->tx_buf[index].type = FEC_TXBUF_T_XDP_TX;
3832+
}
3833+
3834+
txq->tx_buf[index].xdp = frame;
37943835

37953836
status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
37963837
if (fep->bufdesc_ex)
@@ -3809,9 +3850,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
38093850
ebdp->cbd_esc = cpu_to_fec32(estatus);
38103851
}
38113852

3812-
txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
3813-
txq->tx_buf[index].xdp = frame;
3814-
38153853
/* Make sure the updates to rest of the descriptor are performed before
38163854
* transferring ownership.
38173855
*/
@@ -3837,6 +3875,33 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
38373875
return 0;
38383876
}
38393877

3878+
static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep,
3879+
int cpu, struct xdp_buff *xdp,
3880+
u32 dma_sync_len)
3881+
{
3882+
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
3883+
struct fec_enet_priv_tx_q *txq;
3884+
struct netdev_queue *nq;
3885+
int queue, ret;
3886+
3887+
if (unlikely(!xdpf))
3888+
return -EFAULT;
3889+
3890+
queue = fec_enet_xdp_get_tx_queue(fep, cpu);
3891+
txq = fep->tx_queue[queue];
3892+
nq = netdev_get_tx_queue(fep->netdev, queue);
3893+
3894+
__netif_tx_lock(nq, cpu);
3895+
3896+
/* Avoid tx timeout as XDP shares the queue with kernel stack */
3897+
txq_trans_cond_update(nq);
3898+
ret = fec_enet_txq_xmit_frame(fep, txq, xdpf, dma_sync_len, false);
3899+
3900+
__netif_tx_unlock(nq);
3901+
3902+
return ret;
3903+
}
3904+
38403905
static int fec_enet_xdp_xmit(struct net_device *dev,
38413906
int num_frames,
38423907
struct xdp_frame **frames,
@@ -3859,7 +3924,7 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
38593924
/* Avoid tx timeout as XDP shares the queue with kernel stack */
38603925
txq_trans_cond_update(nq);
38613926
for (i = 0; i < num_frames; i++) {
3862-
if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
3927+
if (fec_enet_txq_xmit_frame(fep, txq, frames[i], 0, true) < 0)
38633928
break;
38643929
sent_frames++;
38653930
}

0 commit comments

Comments
 (0)