Skip to content

Commit 1c9ba9c

Browse files
tirthendu-intelAlexei Starovoitov
authored andcommitted
i40e: xsk: add RX multi-buffer support
This patch is inspired from the multi-buffer support in non-zc path for i40e as well as from the patch to support zc on ice. Each subsequent frag is added to skb_shared_info of the first frag for possible xdp_prog use as well to xsk buffer list for accessing the buffers in af_xdp. For XDP_PASS, new pages are allocated for frags and contents are copied from memory backed by xsk_buff_pool. Replace next_to_clean with next_to_process as done in non-zc path and advance it for every buffer and change the semantics of next_to_clean to point to the first buffer of a packet. Driver will use next_to_process in the same way next_to_clean was used previously. For the non multi-buffer case, next_to_process and next_to_clean will always be the same since each packet consists of a single buffer. Signed-off-by: Tirthendu Sarkar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 1bbc04d commit 1c9ba9c

File tree

4 files changed

+84
-18
lines changed

4 files changed

+84
-18
lines changed

drivers/net/ethernet/intel/i40e/i40e_main.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3585,11 +3585,6 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
35853585
if (ring->xsk_pool) {
35863586
ring->rx_buf_len =
35873587
xsk_pool_get_rx_frame_size(ring->xsk_pool);
3588-
/* For AF_XDP ZC, we disallow packets to span on
3589-
* multiple buffers, thus letting us skip that
3590-
* handling in the fast-path.
3591-
*/
3592-
chain_len = 1;
35933588
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
35943589
MEM_TYPE_XSK_BUFF_POOL,
35953590
NULL);

drivers/net/ethernet/intel/i40e/i40e_txrx.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2284,8 +2284,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
22842284
* If the buffer is an EOP buffer, this function exits returning false,
22852285
* otherwise return true indicating that this is in fact a non-EOP buffer.
22862286
*/
2287-
static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
2288-
union i40e_rx_desc *rx_desc)
2287+
bool i40e_is_non_eop(struct i40e_ring *rx_ring,
2288+
union i40e_rx_desc *rx_desc)
22892289
{
22902290
/* if we are the last buffer then there is nothing else to do */
22912291
#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)

drivers/net/ethernet/intel/i40e/i40e_txrx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,8 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
473473
bool __i40e_chk_linearize(struct sk_buff *skb);
474474
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
475475
u32 flags);
476+
bool i40e_is_non_eop(struct i40e_ring *rx_ring,
477+
union i40e_rx_desc *rx_desc);
476478

477479
/**
478480
* i40e_get_head - Retrieve head from head writeback

drivers/net/ethernet/intel/i40e/i40e_xsk.c

Lines changed: 80 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,14 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
294294
{
295295
unsigned int totalsize = xdp->data_end - xdp->data_meta;
296296
unsigned int metasize = xdp->data - xdp->data_meta;
297+
struct skb_shared_info *sinfo = NULL;
297298
struct sk_buff *skb;
299+
u32 nr_frags = 0;
298300

301+
if (unlikely(xdp_buff_has_frags(xdp))) {
302+
sinfo = xdp_get_shared_info_from_buff(xdp);
303+
nr_frags = sinfo->nr_frags;
304+
}
299305
net_prefetch(xdp->data_meta);
300306

301307
/* allocate a skb to store the frags */
@@ -312,6 +318,28 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
312318
__skb_pull(skb, metasize);
313319
}
314320

321+
if (likely(!xdp_buff_has_frags(xdp)))
322+
goto out;
323+
324+
for (int i = 0; i < nr_frags; i++) {
325+
struct skb_shared_info *skinfo = skb_shinfo(skb);
326+
skb_frag_t *frag = &sinfo->frags[i];
327+
struct page *page;
328+
void *addr;
329+
330+
page = dev_alloc_page();
331+
if (!page) {
332+
dev_kfree_skb(skb);
333+
return NULL;
334+
}
335+
addr = page_to_virt(page);
336+
337+
memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
338+
339+
__skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
340+
addr, 0, skb_frag_size(frag));
341+
}
342+
315343
out:
316344
xsk_buff_free(xdp);
317345
return skb;
@@ -322,14 +350,13 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
322350
union i40e_rx_desc *rx_desc,
323351
unsigned int *rx_packets,
324352
unsigned int *rx_bytes,
325-
unsigned int size,
326353
unsigned int xdp_res,
327354
bool *failure)
328355
{
329356
struct sk_buff *skb;
330357

331358
*rx_packets = 1;
332-
*rx_bytes = size;
359+
*rx_bytes = xdp_get_buff_len(xdp_buff);
333360

334361
if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX)
335362
return;
@@ -363,7 +390,6 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
363390
return;
364391
}
365392

366-
*rx_bytes = skb->len;
367393
i40e_process_skb_fields(rx_ring, rx_desc, skb);
368394
napi_gro_receive(&rx_ring->q_vector->napi, skb);
369395
return;
@@ -374,6 +400,31 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
374400
WARN_ON_ONCE(1);
375401
}
376402

403+
static int
404+
i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
405+
struct xdp_buff *xdp, const unsigned int size)
406+
{
407+
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first);
408+
409+
if (!xdp_buff_has_frags(first)) {
410+
sinfo->nr_frags = 0;
411+
sinfo->xdp_frags_size = 0;
412+
xdp_buff_set_frags_flag(first);
413+
}
414+
415+
if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
416+
xsk_buff_free(first);
417+
return -ENOMEM;
418+
}
419+
420+
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
421+
virt_to_page(xdp->data_hard_start), 0, size);
422+
sinfo->xdp_frags_size += size;
423+
xsk_buff_add_frag(xdp);
424+
425+
return 0;
426+
}
427+
377428
/**
378429
* i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
379430
* @rx_ring: Rx ring
@@ -384,13 +435,18 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
384435
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
385436
{
386437
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
438+
u16 next_to_process = rx_ring->next_to_process;
387439
u16 next_to_clean = rx_ring->next_to_clean;
388440
u16 count_mask = rx_ring->count - 1;
389441
unsigned int xdp_res, xdp_xmit = 0;
442+
struct xdp_buff *first = NULL;
390443
struct bpf_prog *xdp_prog;
391444
bool failure = false;
392445
u16 cleaned_count;
393446

447+
if (next_to_process != next_to_clean)
448+
first = *i40e_rx_bi(rx_ring, next_to_clean);
449+
394450
/* NB! xdp_prog will always be !NULL, due to the fact that
395451
* this path is enabled by setting an XDP program.
396452
*/
@@ -404,7 +460,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
404460
unsigned int size;
405461
u64 qword;
406462

407-
rx_desc = I40E_RX_DESC(rx_ring, next_to_clean);
463+
rx_desc = I40E_RX_DESC(rx_ring, next_to_process);
408464
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
409465

410466
/* This memory barrier is needed to keep us from reading
@@ -417,9 +473,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
417473
i40e_clean_programming_status(rx_ring,
418474
rx_desc->raw.qword[0],
419475
qword);
420-
bi = *i40e_rx_bi(rx_ring, next_to_clean);
476+
bi = *i40e_rx_bi(rx_ring, next_to_process);
421477
xsk_buff_free(bi);
422-
next_to_clean = (next_to_clean + 1) & count_mask;
478+
next_to_process = (next_to_process + 1) & count_mask;
423479
continue;
424480
}
425481

@@ -428,22 +484,35 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
428484
if (!size)
429485
break;
430486

431-
bi = *i40e_rx_bi(rx_ring, next_to_clean);
487+
bi = *i40e_rx_bi(rx_ring, next_to_process);
432488
xsk_buff_set_size(bi, size);
433489
xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
434490

435-
xdp_res = i40e_run_xdp_zc(rx_ring, bi, xdp_prog);
436-
i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets,
437-
&rx_bytes, size, xdp_res, &failure);
491+
if (!first)
492+
first = bi;
493+
else if (i40e_add_xsk_frag(rx_ring, first, bi, size))
494+
break;
495+
496+
next_to_process = (next_to_process + 1) & count_mask;
497+
498+
if (i40e_is_non_eop(rx_ring, rx_desc))
499+
continue;
500+
501+
xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
502+
i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
503+
&rx_bytes, xdp_res, &failure);
504+
first->flags = 0;
505+
next_to_clean = next_to_process;
438506
if (failure)
439507
break;
440508
total_rx_packets += rx_packets;
441509
total_rx_bytes += rx_bytes;
442510
xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
443-
next_to_clean = (next_to_clean + 1) & count_mask;
511+
first = NULL;
444512
}
445513

446514
rx_ring->next_to_clean = next_to_clean;
515+
rx_ring->next_to_process = next_to_process;
447516
cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask;
448517

449518
if (cleaned_count >= I40E_RX_BUFFER_WRITE)

0 commit comments

Comments
 (0)