Skip to content

Commit 35b510e

Browse files
Saeed Mahameeddavem330
authored andcommitted
net/mlx5e: XDP TX xmit more
Previously we rang XDP SQ doorbell on every forwarded XDP packet. Here we introduce a xmit more like mechanism that will queue up more than one packet into SQ (up to RX napi budget) w/o notifying the hardware. Once RX napi budget is consumed and we exit napi RX loop, we will flush (doorbell) all XDP looped packets in case there are such. XDP forward packet rate: Comparing XDP with and w/o xmit more (bulk transmit): RX Cores XDP TX XDP TX (xmit more) --------------------------------------------------- 1 6.5Mpps 12.4Mpps 2 13.2Mpps 24.2Mpps 4 25.2Mpps 36.3Mpps* 8 36.3Mpps* 36.3Mpps* *My xmitter was limited to 36.3Mpps, so it is the bottleneck. It seems that receive side can handle more. Signed-off-by: Saeed Mahameed <[email protected]> Signed-off-by: Tariq Toukan <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b5503b9 commit 35b510e

File tree

2 files changed

+25
-8
lines changed

2 files changed

+25
-8
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ struct mlx5e_sq {
433433
struct {
434434
struct mlx5e_sq_wqe_info *wqe_info;
435435
struct mlx5e_dma_info *di;
436+
bool doorbell;
436437
} xdp;
437438
} db;
438439

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,18 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
632632
napi_gro_receive(rq->cq.napi, skb);
633633
}
634634

635+
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
636+
{
637+
struct mlx5_wq_cyc *wq = &sq->wq;
638+
struct mlx5e_tx_wqe *wqe;
639+
u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */
640+
641+
wqe = mlx5_wq_cyc_get_wqe(wq, pi);
642+
643+
wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
644+
mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
645+
}
646+
635647
static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
636648
struct mlx5e_dma_info *di,
637649
unsigned int data_offset,
@@ -652,6 +664,11 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
652664
void *data = page_address(di->page) + data_offset;
653665

654666
if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) {
667+
if (sq->db.xdp.doorbell) {
668+
/* SQ is full, ring doorbell */
669+
mlx5e_xmit_xdp_doorbell(sq);
670+
sq->db.xdp.doorbell = false;
671+
}
655672
rq->stats.xdp_tx_full++;
656673
mlx5e_page_release(rq, di, true);
657674
return;
@@ -681,14 +698,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
681698
wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS;
682699
sq->pc += MLX5E_XDP_TX_WQEBBS;
683700

684-
wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
685-
mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
686-
687-
/* fill sq edge with nops to avoid wqe wrap around */
688-
while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
689-
sq->db.xdp.wqe_info[pi].opcode = MLX5_OPCODE_NOP;
690-
mlx5e_send_nop(sq, false);
691-
}
701+
sq->db.xdp.doorbell = true;
692702
rq->stats.xdp_tx++;
693703
}
694704

@@ -863,6 +873,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
863873
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
864874
{
865875
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
876+
struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq;
866877
int work_done = 0;
867878

868879
if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state)))
@@ -889,6 +900,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
889900
rq->handle_rx_cqe(rq, cqe);
890901
}
891902

903+
if (xdp_sq->db.xdp.doorbell) {
904+
mlx5e_xmit_xdp_doorbell(xdp_sq);
905+
xdp_sq->db.xdp.doorbell = false;
906+
}
907+
892908
mlx5_cqwq_update_db_record(&cq->wq);
893909

894910
/* ensure cq space is freed before enabling more cqes */

0 commit comments

Comments
 (0)