Skip to content

Commit 7ed2bc8

Browse files
vladimirolteandavem330
authored andcommitted
net: enetc: add support for XDP_TX
For reflecting packets back into the interface they came from, we create an array of TX software BDs derived from the RX software BDs. Therefore, we need to extend the TX software BD structure to contain most of the stuff that's already present in the RX software BD structure, for reasons that will become evident in a moment. For a frame with the XDP_TX verdict, we don't reuse any buffer right away as we do for XDP_DROP (the same page half) or XDP_PASS (the other page half, same as the skb code path). Because the buffer transfers ownership from the RX ring to the TX ring, reusing any page half right away is very dangerous. So what we can do is we can recycle the same page half as soon as TX is complete. The code path is: enetc_poll -> enetc_clean_rx_ring_xdp -> enetc_xdp_tx -> enetc_refill_rx_ring (time passes, another MSI interrupt is raised) enetc_poll -> enetc_clean_tx_ring -> enetc_recycle_xdp_tx_buff But that creates a problem, because there is a potentially large time window between enetc_xdp_tx and enetc_recycle_xdp_tx_buff, period in which we'll have less and less RX buffers. Basically, when the ship starts sinking, the knee-jerk reaction is to let enetc_refill_rx_ring do what it does for the standard skb code path (refill every 16 consumed buffers), but that turns out to be very inefficient. The problem is that we have no rx_swbd->page at our disposal from the enetc_reuse_page path, so enetc_refill_rx_ring would have to call enetc_new_page for every buffer that we refill (if we choose to refill at this early stage). Very inefficient, it only makes the problem worse, because page allocation is an expensive process, and CPU time is exactly what we're lacking. Additionally, there is an even bigger problem: if we let enetc_refill_rx_ring top up the ring's buffers again from the RX path, remember that the buffers sent to transmission haven't disappeared anywhere. They will be eventually sent, and processed in enetc_clean_tx_ring, and an attempt will be made to recycle them. But surprise, the RX ring is already full of new buffers, because we were premature in deciding that we should refill. So not only we took the expensive decision of allocating new pages, but now we must throw away perfectly good and reusable buffers. So what we do is we implement an elastic refill mechanism, which keeps track of the number of in-flight XDP_TX buffer descriptors. We top up the RX ring only up to the total ring capacity minus the number of BDs that are in flight (because we know that those BDs will return to us eventually). The enetc driver manages 1 RX ring per CPU, and the default TX ring management is the same. So we do XDP_TX towards the TX ring of the same index, because it is affined to the same CPU. This will probably not produce great results when we have a tc-taprio/tc-mqprio qdisc on the interface, because in that case, the number of TX rings might be greater, but I didn't add any checks for that yet (mostly because I didn't know what checks to add). It should also be noted that we need to change the DMA mapping direction for RX buffers, since they may now be reflected into the TX ring of the same device. We choose to use DMA_BIDIRECTIONAL instead of unmapping and remapping as DMA_TO_DEVICE, because performance is better this way. Signed-off-by: Vladimir Oltean <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent d1b1510 commit 7ed2bc8

File tree

3 files changed

+228
-25
lines changed

3 files changed

+228
-25
lines changed

drivers/net/ethernet/freescale/enetc/enetc.c

Lines changed: 193 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,20 @@
88
#include <linux/vmalloc.h>
99
#include <net/pkt_sched.h>
1010

11-
/* ENETC overhead: optional extension BD + 1 BD gap */
12-
#define ENETC_TXBDS_NEEDED(val) ((val) + 2)
13-
/* max # of chained Tx BDs is 15, including head and extension BD */
14-
#define ENETC_MAX_SKB_FRAGS 13
15-
#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
16-
1711
static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
1812
struct enetc_tx_swbd *tx_swbd)
1913
{
14+
/* For XDP_TX, pages come from RX, whereas for the other contexts where
15+
* we have is_dma_page_set, those come from skb_frag_dma_map. We need
16+
* to match the DMA mapping length, so we need to differentiate those.
17+
*/
2018
if (tx_swbd->is_dma_page)
2119
dma_unmap_page(tx_ring->dev, tx_swbd->dma,
22-
tx_swbd->len, DMA_TO_DEVICE);
20+
tx_swbd->is_xdp_tx ? PAGE_SIZE : tx_swbd->len,
21+
tx_swbd->dir);
2322
else
2423
dma_unmap_single(tx_ring->dev, tx_swbd->dma,
25-
tx_swbd->len, DMA_TO_DEVICE);
24+
tx_swbd->len, tx_swbd->dir);
2625
tx_swbd->dma = 0;
2726
}
2827

@@ -38,6 +37,13 @@ static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
3837
}
3938
}
4039

40+
/* Let H/W know BD ring has been updated */
41+
static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring)
42+
{
43+
/* includes wmb() */
44+
enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use);
45+
}
46+
4147
static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
4248
int active_offloads)
4349
{
@@ -68,6 +74,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
6874
tx_swbd->dma = dma;
6975
tx_swbd->len = len;
7076
tx_swbd->is_dma_page = 0;
77+
tx_swbd->dir = DMA_TO_DEVICE;
7178
count++;
7279

7380
do_vlan = skb_vlan_tag_present(skb);
@@ -150,6 +157,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
150157
tx_swbd->dma = dma;
151158
tx_swbd->len = len;
152159
tx_swbd->is_dma_page = 1;
160+
tx_swbd->dir = DMA_TO_DEVICE;
153161
count++;
154162
}
155163

@@ -166,8 +174,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
166174

167175
skb_tx_timestamp(skb);
168176

169-
/* let H/W know BD ring has been updated */
170-
enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */
177+
enetc_update_tx_ring_tail(tx_ring);
171178

172179
return count;
173180

@@ -320,6 +327,43 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
320327
}
321328
}
322329

330+
static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
331+
struct enetc_tx_swbd *tx_swbd)
332+
{
333+
struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
334+
struct enetc_bdr *rx_ring = priv->rx_ring[tx_ring->index];
335+
struct enetc_rx_swbd rx_swbd = {
336+
.dma = tx_swbd->dma,
337+
.page = tx_swbd->page,
338+
.page_offset = tx_swbd->page_offset,
339+
.dir = tx_swbd->dir,
340+
.len = tx_swbd->len,
341+
};
342+
343+
if (likely(enetc_swbd_unused(rx_ring))) {
344+
enetc_reuse_page(rx_ring, &rx_swbd);
345+
346+
/* sync for use by the device */
347+
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd.dma,
348+
rx_swbd.page_offset,
349+
ENETC_RXB_DMA_SIZE_XDP,
350+
rx_swbd.dir);
351+
352+
rx_ring->stats.recycles++;
353+
} else {
354+
/* RX ring is already full, we need to unmap and free the
355+
* page, since there's nothing useful we can do with it.
356+
*/
357+
rx_ring->stats.recycle_failures++;
358+
359+
dma_unmap_page(rx_ring->dev, rx_swbd.dma, PAGE_SIZE,
360+
rx_swbd.dir);
361+
__free_page(rx_swbd.page);
362+
}
363+
364+
rx_ring->xdp.xdp_tx_in_flight--;
365+
}
366+
323367
static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
324368
{
325369
struct net_device *ndev = tx_ring->ndev;
@@ -351,7 +395,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
351395
}
352396
}
353397

354-
if (likely(tx_swbd->dma))
398+
if (tx_swbd->is_xdp_tx)
399+
enetc_recycle_xdp_tx_buff(tx_ring, tx_swbd);
400+
else if (likely(tx_swbd->dma))
355401
enetc_unmap_tx_buff(tx_ring, tx_swbd);
356402

357403
if (tx_swbd->skb) {
@@ -405,14 +451,18 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
405451
static bool enetc_new_page(struct enetc_bdr *rx_ring,
406452
struct enetc_rx_swbd *rx_swbd)
407453
{
454+
bool xdp = !!(rx_ring->xdp.prog);
408455
struct page *page;
409456
dma_addr_t addr;
410457

411458
page = dev_alloc_page();
412459
if (unlikely(!page))
413460
return false;
414461

415-
addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
462+
/* For XDP_TX, we forgo dma_unmap -> dma_map */
463+
rx_swbd->dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
464+
465+
addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, rx_swbd->dir);
416466
if (unlikely(dma_mapping_error(rx_ring->dev, addr))) {
417467
__free_page(page);
418468

@@ -536,14 +586,18 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
536586
#endif
537587
}
538588

589+
/* This gets called during the non-XDP NAPI poll cycle as well as on XDP_PASS,
590+
* so it needs to work with both DMA_FROM_DEVICE as well as DMA_BIDIRECTIONAL
591+
* mapped buffers.
592+
*/
539593
static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
540594
int i, u16 size)
541595
{
542596
struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i];
543597

544598
dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma,
545599
rx_swbd->page_offset,
546-
size, DMA_FROM_DEVICE);
600+
size, rx_swbd->dir);
547601
return rx_swbd;
548602
}
549603

@@ -561,10 +615,10 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
561615
/* sync for use by the device */
562616
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
563617
rx_swbd->page_offset,
564-
buffer_size, DMA_FROM_DEVICE);
618+
buffer_size, rx_swbd->dir);
565619
} else {
566-
dma_unmap_page(rx_ring->dev, rx_swbd->dma,
567-
PAGE_SIZE, DMA_FROM_DEVICE);
620+
dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
621+
rx_swbd->dir);
568622
}
569623

570624
rx_swbd->page = NULL;
@@ -718,13 +772,71 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
718772
return rx_frm_cnt;
719773
}
720774

775+
static void enetc_xdp_map_tx_buff(struct enetc_bdr *tx_ring, int i,
776+
struct enetc_tx_swbd *tx_swbd,
777+
int frm_len)
778+
{
779+
union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i);
780+
781+
prefetchw(txbd);
782+
783+
enetc_clear_tx_bd(txbd);
784+
txbd->addr = cpu_to_le64(tx_swbd->dma + tx_swbd->page_offset);
785+
txbd->buf_len = cpu_to_le16(tx_swbd->len);
786+
txbd->frm_len = cpu_to_le16(frm_len);
787+
788+
memcpy(&tx_ring->tx_swbd[i], tx_swbd, sizeof(*tx_swbd));
789+
}
790+
791+
/* Puts in the TX ring one XDP frame, mapped as an array of TX software buffer
792+
* descriptors.
793+
*/
794+
static bool enetc_xdp_tx(struct enetc_bdr *tx_ring,
795+
struct enetc_tx_swbd *xdp_tx_arr, int num_tx_swbd)
796+
{
797+
struct enetc_tx_swbd *tmp_tx_swbd = xdp_tx_arr;
798+
int i, k, frm_len = tmp_tx_swbd->len;
799+
800+
if (unlikely(enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(num_tx_swbd)))
801+
return false;
802+
803+
while (unlikely(!tmp_tx_swbd->is_eof)) {
804+
tmp_tx_swbd++;
805+
frm_len += tmp_tx_swbd->len;
806+
}
807+
808+
i = tx_ring->next_to_use;
809+
810+
for (k = 0; k < num_tx_swbd; k++) {
811+
struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[k];
812+
813+
enetc_xdp_map_tx_buff(tx_ring, i, xdp_tx_swbd, frm_len);
814+
815+
/* last BD needs 'F' bit set */
816+
if (xdp_tx_swbd->is_eof) {
817+
union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i);
818+
819+
txbd->flags = ENETC_TXBD_FLAGS_F;
820+
}
821+
822+
enetc_bdr_idx_inc(tx_ring, &i);
823+
}
824+
825+
tx_ring->next_to_use = i;
826+
827+
return true;
828+
}
829+
721830
static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
722831
struct xdp_buff *xdp_buff, u16 size)
723832
{
724833
struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
725834
void *hard_start = page_address(rx_swbd->page) + rx_swbd->page_offset;
726835
struct skb_shared_info *shinfo;
727836

837+
/* To be used for XDP_TX */
838+
rx_swbd->len = size;
839+
728840
xdp_prepare_buff(xdp_buff, hard_start - rx_ring->buffer_offset,
729841
rx_ring->buffer_offset, size, false);
730842

@@ -739,6 +851,9 @@ static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
739851
struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
740852
skb_frag_t *frag = &shinfo->frags[shinfo->nr_frags];
741853

854+
/* To be used for XDP_TX */
855+
rx_swbd->len = size;
856+
742857
skb_frag_off_set(frag, rx_swbd->page_offset);
743858
skb_frag_size_set(frag, size);
744859
__skb_frag_set_page(frag, rx_swbd->page);
@@ -780,15 +895,48 @@ static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring,
780895
{
781896
enetc_reuse_page(rx_ring, rx_swbd);
782897

783-
/* sync for use by the device */
784898
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
785899
rx_swbd->page_offset,
786900
ENETC_RXB_DMA_SIZE_XDP,
787-
DMA_FROM_DEVICE);
901+
rx_swbd->dir);
788902

789903
rx_swbd->page = NULL;
790904
}
791905

906+
/* Convert RX buffer descriptors to TX buffer descriptors. These will be
907+
* recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the
908+
* RX software BDs because the ownership of the buffer no longer belongs to the
909+
* RX ring, so enetc_refill_rx_ring may not reuse rx_swbd->page.
910+
*/
911+
static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr,
912+
struct enetc_bdr *rx_ring,
913+
int rx_ring_first, int rx_ring_last)
914+
{
915+
int n = 0;
916+
917+
for (; rx_ring_first != rx_ring_last;
918+
n++, enetc_bdr_idx_inc(rx_ring, &rx_ring_first)) {
919+
struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
920+
struct enetc_tx_swbd *tx_swbd = &xdp_tx_arr[n];
921+
922+
/* No need to dma_map, we already have DMA_BIDIRECTIONAL */
923+
tx_swbd->dma = rx_swbd->dma;
924+
tx_swbd->dir = rx_swbd->dir;
925+
tx_swbd->page = rx_swbd->page;
926+
tx_swbd->page_offset = rx_swbd->page_offset;
927+
tx_swbd->len = rx_swbd->len;
928+
tx_swbd->is_dma_page = true;
929+
tx_swbd->is_xdp_tx = true;
930+
tx_swbd->is_eof = false;
931+
memset(rx_swbd, 0, sizeof(*rx_swbd));
932+
}
933+
934+
/* We rely on caller providing an rx_ring_last > rx_ring_first */
935+
xdp_tx_arr[n - 1].is_eof = true;
936+
937+
return n;
938+
}
939+
792940
static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
793941
int rx_ring_last)
794942
{
@@ -804,6 +952,10 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
804952
struct napi_struct *napi, int work_limit,
805953
struct bpf_prog *prog)
806954
{
955+
struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0};
956+
struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
957+
struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index];
958+
int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0;
807959
int rx_frm_cnt = 0, rx_byte_cnt = 0;
808960
int cleaned_cnt, i;
809961
u32 xdp_act;
@@ -819,10 +971,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
819971
struct sk_buff *skb;
820972
u32 bd_status;
821973

822-
if (cleaned_cnt >= ENETC_RXBD_BUNDLE)
823-
cleaned_cnt -= enetc_refill_rx_ring(rx_ring,
824-
cleaned_cnt);
825-
826974
rxbd = enetc_rxbd(rx_ring, i);
827975
bd_status = le32_to_cpu(rxbd->r.lstatus);
828976
if (!bd_status)
@@ -865,6 +1013,20 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
8651013

8661014
napi_gro_receive(napi, skb);
8671015
break;
1016+
case XDP_TX:
1017+
xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr,
1018+
rx_ring,
1019+
orig_i, i);
1020+
1021+
if (!enetc_xdp_tx(tx_ring, xdp_tx_arr, xdp_tx_bd_cnt)) {
1022+
enetc_xdp_drop(rx_ring, orig_i, i);
1023+
tx_ring->stats.xdp_tx_drops++;
1024+
} else {
1025+
tx_ring->stats.xdp_tx += xdp_tx_bd_cnt;
1026+
rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt;
1027+
xdp_tx_frm_cnt++;
1028+
}
1029+
break;
8681030
default:
8691031
bpf_warn_invalid_xdp_action(xdp_act);
8701032
}
@@ -877,6 +1039,13 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
8771039
rx_ring->stats.packets += rx_frm_cnt;
8781040
rx_ring->stats.bytes += rx_byte_cnt;
8791041

1042+
if (xdp_tx_frm_cnt)
1043+
enetc_update_tx_ring_tail(tx_ring);
1044+
1045+
if (cleaned_cnt > rx_ring->xdp.xdp_tx_in_flight)
1046+
enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring) -
1047+
rx_ring->xdp.xdp_tx_in_flight);
1048+
8801049
return rx_frm_cnt;
8811050
}
8821051

@@ -1141,8 +1310,8 @@ static void enetc_free_rx_ring(struct enetc_bdr *rx_ring)
11411310
if (!rx_swbd->page)
11421311
continue;
11431312

1144-
dma_unmap_page(rx_ring->dev, rx_swbd->dma,
1145-
PAGE_SIZE, DMA_FROM_DEVICE);
1313+
dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
1314+
rx_swbd->dir);
11461315
__free_page(rx_swbd->page);
11471316
rx_swbd->page = NULL;
11481317
}

0 commit comments

Comments
 (0)