Skip to content

Commit 9647c57

Browse files
magnus-karlssonborkmann
authored andcommitted
xsk: i40e: ice: ixgbe: mlx5: Test for dma_need_sync earlier for better performance
Test for dma_need_sync earlier to increase performance. xsk_buff_dma_sync_for_cpu() takes an xdp_buff as parameter and from that the xsk_buff_pool reference is dug out. Perf shows that this dereference causes a lot of cache misses. But as the buffer pool is now sent down to the driver at zero-copy initialization time, we might as well use this pointer directly, instead of going via the xsk_buff and we can do so already in xsk_buff_dma_sync_for_cpu() instead of in xp_dma_sync_for_cpu. This gets rid of these cache misses. Throughput increases with 3% for the xdpsock l2fwd sample application on my machine. Signed-off-by: Magnus Karlsson <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Björn Töpel <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 8ef4e27 commit 9647c57

File tree

6 files changed

+10
-10
lines changed

6 files changed

+10
-10
lines changed

drivers/net/ethernet/intel/i40e/i40e_xsk.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
314314

315315
bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
316316
(*bi)->data_end = (*bi)->data + size;
317-
xsk_buff_dma_sync_for_cpu(*bi);
317+
xsk_buff_dma_sync_for_cpu(*bi, rx_ring->xsk_pool);
318318

319319
xdp_res = i40e_run_xdp_zc(rx_ring, *bi);
320320
if (xdp_res) {

drivers/net/ethernet/intel/ice/ice_xsk.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
595595

596596
rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
597597
rx_buf->xdp->data_end = rx_buf->xdp->data + size;
598-
xsk_buff_dma_sync_for_cpu(rx_buf->xdp);
598+
xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool);
599599

600600
xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
601601
if (xdp_res) {

drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
287287
}
288288

289289
bi->xdp->data_end = bi->xdp->data + size;
290-
xsk_buff_dma_sync_for_cpu(bi->xdp);
290+
xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool);
291291
xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
292292

293293
if (xdp_res) {

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
4848

4949
xdp->data_end = xdp->data + cqe_bcnt32;
5050
xdp_set_data_meta_invalid(xdp);
51-
xsk_buff_dma_sync_for_cpu(xdp);
51+
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
5252
prefetch(xdp->data);
5353

5454
rcu_read_lock();
@@ -99,7 +99,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
9999

100100
xdp->data_end = xdp->data + cqe_bcnt;
101101
xdp_set_data_meta_invalid(xdp);
102-
xsk_buff_dma_sync_for_cpu(xdp);
102+
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
103103
prefetch(xdp->data);
104104

105105
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {

include/net/xdp_sock_drv.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,13 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
9999
return xp_raw_get_data(pool, addr);
100100
}
101101

102-
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
102+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
103103
{
104104
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
105105

106+
if (!pool->dma_need_sync)
107+
return;
108+
106109
xp_dma_sync_for_cpu(xskb);
107110
}
108111

@@ -222,7 +225,7 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
222225
return NULL;
223226
}
224227

225-
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
228+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
226229
{
227230
}
228231

include/net/xsk_buff_pool.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,6 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
114114
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
115115
static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
116116
{
117-
if (!xskb->pool->dma_need_sync)
118-
return;
119-
120117
xp_dma_sync_for_cpu_slow(xskb);
121118
}
122119

0 commit comments

Comments
 (0)