Skip to content

Commit fd8e403

Browse files
praveenkaligineedidavem330
authored andcommitted
gve: Add AF_XDP zero-copy support for GQI-QPL format
Adding AF_XDP zero-copy support. Note: Although these changes support AF_XDP socket in zero-copy mode, there is still a copy happening within the driver between XSK buffer pool and QPL bounce buffers in GQI-QPL format. In GQI-QPL queue format, the driver needs to allocate a fixed size memory, the size specified by vNIC device, for RX/TX and register this memory as a bounce buffer with the vNIC device when a queue is created. The number of pages in the bounce buffer is limited and the pages need to be made available to the vNIC by copying the RX data out to prevent head-of-line blocking. Therefore, we cannot pass the XSK buffer pool to the vNIC. The number of copies on RX path from the bounce buffer to XSK buffer is 2 for AF_XDP copy mode (bounce buffer -> allocated page frag -> XSK buffer) and 1 for AF_XDP zero-copy mode (bounce buffer -> XSK buffer). This patch contains the following changes: 1) Enable and disable XSK buffer pool 2) Copy XDP packets from QPL bounce buffers to XSK buffer on rx 3) Copy XDP packets from XSK buffer to QPL bounce buffers and ring the doorbell as part of XDP TX napi poll 4) ndo_xsk_wakeup callback support Signed-off-by: Praveen Kaligineedi <[email protected]> Reviewed-by: Jeroen de Borst <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 39a7f4a commit fd8e403

File tree

5 files changed

+274
-9
lines changed

5 files changed

+274
-9
lines changed

drivers/net/ethernet/google/gve/gve.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ struct gve_rx_ring {
248248

249249
/* XDP stuff */
250250
struct xdp_rxq_info xdp_rxq;
251+
struct xdp_rxq_info xsk_rxq;
252+
struct xsk_buff_pool *xsk_pool;
251253
struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */
252254
};
253255

@@ -275,6 +277,7 @@ struct gve_tx_buffer_state {
275277
};
276278
struct {
277279
u16 size; /* size of xmitted xdp pkt */
280+
u8 is_xsk; /* xsk buff */
278281
} xdp;
279282
union {
280283
struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
@@ -469,6 +472,10 @@ struct gve_tx_ring {
469472
dma_addr_t q_resources_bus; /* dma address of the queue resources */
470473
dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
471474
struct u64_stats_sync statss; /* sync stats for 32bit archs */
475+
struct xsk_buff_pool *xsk_pool;
476+
u32 xdp_xsk_wakeup;
477+
u32 xdp_xsk_done;
478+
u64 xdp_xsk_sent;
472479
u64 xdp_xmit;
473480
u64 xdp_xmit_errors;
474481
} ____cacheline_aligned;

drivers/net/ethernet/google/gve/gve_ethtool.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
6262
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
6363
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
6464
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
65-
"tx_dma_mapping_error[%u]",
66-
"tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
65+
"tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]",
66+
"tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
6767
};
6868

6969
static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
@@ -381,13 +381,17 @@ gve_get_ethtool_stats(struct net_device *netdev,
381381
data[i++] = value;
382382
}
383383
}
384+
/* XDP xsk counters */
385+
data[i++] = tx->xdp_xsk_wakeup;
386+
data[i++] = tx->xdp_xsk_done;
384387
do {
385388
start = u64_stats_fetch_begin(&priv->tx[ring].statss);
386-
data[i] = tx->xdp_xmit;
387-
data[i + 1] = tx->xdp_xmit_errors;
389+
data[i] = tx->xdp_xsk_sent;
390+
data[i + 1] = tx->xdp_xmit;
391+
data[i + 2] = tx->xdp_xmit_errors;
388392
} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
389393
start));
390-
i += 2; /* XDP tx counters */
394+
i += 3; /* XDP tx counters */
391395
}
392396
} else {
393397
i += num_tx_queues * NUM_GVE_TX_CNTS;

drivers/net/ethernet/google/gve/gve_main.c

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <linux/utsname.h>
1818
#include <linux/version.h>
1919
#include <net/sch_generic.h>
20+
#include <net/xdp_sock_drv.h>
2021
#include "gve.h"
2122
#include "gve_dqo.h"
2223
#include "gve_adminq.h"
@@ -1188,6 +1189,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
11881189
struct gve_rx_ring *rx;
11891190
int err = 0;
11901191
int i, j;
1192+
u32 tx_qid;
11911193

11921194
if (!priv->num_xdp_queues)
11931195
return 0;
@@ -1204,6 +1206,24 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
12041206
MEM_TYPE_PAGE_SHARED, NULL);
12051207
if (err)
12061208
goto err;
1209+
rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1210+
if (rx->xsk_pool) {
1211+
err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1212+
napi->napi_id);
1213+
if (err)
1214+
goto err;
1215+
err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1216+
MEM_TYPE_XSK_BUFF_POOL, NULL);
1217+
if (err)
1218+
goto err;
1219+
xsk_pool_set_rxq_info(rx->xsk_pool,
1220+
&rx->xsk_rxq);
1221+
}
1222+
}
1223+
1224+
for (i = 0; i < priv->num_xdp_queues; i++) {
1225+
tx_qid = gve_xdp_tx_queue_id(priv, i);
1226+
priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
12071227
}
12081228
return 0;
12091229

@@ -1212,13 +1232,15 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
12121232
rx = &priv->rx[j];
12131233
if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
12141234
xdp_rxq_info_unreg(&rx->xdp_rxq);
1235+
if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1236+
xdp_rxq_info_unreg(&rx->xsk_rxq);
12151237
}
12161238
return err;
12171239
}
12181240

12191241
static void gve_unreg_xdp_info(struct gve_priv *priv)
12201242
{
1221-
int i;
1243+
int i, tx_qid;
12221244

12231245
if (!priv->num_xdp_queues)
12241246
return;
@@ -1227,6 +1249,15 @@ static void gve_unreg_xdp_info(struct gve_priv *priv)
12271249
struct gve_rx_ring *rx = &priv->rx[i];
12281250

12291251
xdp_rxq_info_unreg(&rx->xdp_rxq);
1252+
if (rx->xsk_pool) {
1253+
xdp_rxq_info_unreg(&rx->xsk_rxq);
1254+
rx->xsk_pool = NULL;
1255+
}
1256+
}
1257+
1258+
for (i = 0; i < priv->num_xdp_queues; i++) {
1259+
tx_qid = gve_xdp_tx_queue_id(priv, i);
1260+
priv->tx[tx_qid].xsk_pool = NULL;
12301261
}
12311262
}
12321263

@@ -1469,6 +1500,140 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
14691500
return err;
14701501
}
14711502

1503+
static int gve_xsk_pool_enable(struct net_device *dev,
1504+
struct xsk_buff_pool *pool,
1505+
u16 qid)
1506+
{
1507+
struct gve_priv *priv = netdev_priv(dev);
1508+
struct napi_struct *napi;
1509+
struct gve_rx_ring *rx;
1510+
int tx_qid;
1511+
int err;
1512+
1513+
if (qid >= priv->rx_cfg.num_queues) {
1514+
dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1515+
return -EINVAL;
1516+
}
1517+
if (xsk_pool_get_rx_frame_size(pool) <
1518+
priv->dev->max_mtu + sizeof(struct ethhdr)) {
1519+
dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1520+
return -EINVAL;
1521+
}
1522+
1523+
err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1524+
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1525+
if (err)
1526+
return err;
1527+
1528+
/* If XDP prog is not installed, return */
1529+
if (!priv->xdp_prog)
1530+
return 0;
1531+
1532+
rx = &priv->rx[qid];
1533+
napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1534+
err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1535+
if (err)
1536+
goto err;
1537+
1538+
err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1539+
MEM_TYPE_XSK_BUFF_POOL, NULL);
1540+
if (err)
1541+
goto err;
1542+
1543+
xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1544+
rx->xsk_pool = pool;
1545+
1546+
tx_qid = gve_xdp_tx_queue_id(priv, qid);
1547+
priv->tx[tx_qid].xsk_pool = pool;
1548+
1549+
return 0;
1550+
err:
1551+
if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1552+
xdp_rxq_info_unreg(&rx->xsk_rxq);
1553+
1554+
xsk_pool_dma_unmap(pool,
1555+
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1556+
return err;
1557+
}
1558+
1559+
static int gve_xsk_pool_disable(struct net_device *dev,
1560+
u16 qid)
1561+
{
1562+
struct gve_priv *priv = netdev_priv(dev);
1563+
struct napi_struct *napi_rx;
1564+
struct napi_struct *napi_tx;
1565+
struct xsk_buff_pool *pool;
1566+
int tx_qid;
1567+
1568+
pool = xsk_get_pool_from_qid(dev, qid);
1569+
if (!pool)
1570+
return -EINVAL;
1571+
if (qid >= priv->rx_cfg.num_queues)
1572+
return -EINVAL;
1573+
1574+
/* If XDP prog is not installed, unmap DMA and return */
1575+
if (!priv->xdp_prog)
1576+
goto done;
1577+
1578+
tx_qid = gve_xdp_tx_queue_id(priv, qid);
1579+
if (!netif_running(dev)) {
1580+
priv->rx[qid].xsk_pool = NULL;
1581+
xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1582+
priv->tx[tx_qid].xsk_pool = NULL;
1583+
goto done;
1584+
}
1585+
1586+
napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1587+
napi_disable(napi_rx); /* make sure current rx poll is done */
1588+
1589+
napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1590+
napi_disable(napi_tx); /* make sure current tx poll is done */
1591+
1592+
priv->rx[qid].xsk_pool = NULL;
1593+
xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1594+
priv->tx[tx_qid].xsk_pool = NULL;
1595+
smp_mb(); /* Make sure it is visible to the workers on datapath */
1596+
1597+
napi_enable(napi_rx);
1598+
if (gve_rx_work_pending(&priv->rx[qid]))
1599+
napi_schedule(napi_rx);
1600+
1601+
napi_enable(napi_tx);
1602+
if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1603+
napi_schedule(napi_tx);
1604+
1605+
done:
1606+
xsk_pool_dma_unmap(pool,
1607+
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1608+
return 0;
1609+
}
1610+
1611+
static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1612+
{
1613+
struct gve_priv *priv = netdev_priv(dev);
1614+
int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1615+
1616+
if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1617+
return -EINVAL;
1618+
1619+
if (flags & XDP_WAKEUP_TX) {
1620+
struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1621+
struct napi_struct *napi =
1622+
&priv->ntfy_blocks[tx->ntfy_id].napi;
1623+
1624+
if (!napi_if_scheduled_mark_missed(napi)) {
1625+
/* Call local_bh_enable to trigger SoftIRQ processing */
1626+
local_bh_disable();
1627+
napi_schedule(napi);
1628+
local_bh_enable();
1629+
}
1630+
1631+
tx->xdp_xsk_wakeup++;
1632+
}
1633+
1634+
return 0;
1635+
}
1636+
14721637
static int verify_xdp_configuration(struct net_device *dev)
14731638
{
14741639
struct gve_priv *priv = netdev_priv(dev);
@@ -1512,6 +1677,11 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
15121677
switch (xdp->command) {
15131678
case XDP_SETUP_PROG:
15141679
return gve_set_xdp(priv, xdp->prog, xdp->extack);
1680+
case XDP_SETUP_XSK_POOL:
1681+
if (xdp->xsk.pool)
1682+
return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1683+
else
1684+
return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
15151685
default:
15161686
return -EINVAL;
15171687
}
@@ -1713,6 +1883,7 @@ static const struct net_device_ops gve_netdev_ops = {
17131883
.ndo_set_features = gve_set_features,
17141884
.ndo_bpf = gve_xdp,
17151885
.ndo_xdp_xmit = gve_xdp_xmit,
1886+
.ndo_xsk_wakeup = gve_xsk_wakeup,
17161887
};
17171888

17181889
static void gve_handle_status(struct gve_priv *priv, u32 status)
@@ -1838,6 +2009,7 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv)
18382009
priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
18392010
priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
18402011
priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2012+
priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
18412013
} else {
18422014
priv->dev->xdp_features = 0;
18432015
}

drivers/net/ethernet/google/gve/gve_rx.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/etherdevice.h>
1111
#include <linux/filter.h>
1212
#include <net/xdp.h>
13+
#include <net/xdp_sock_drv.h>
1314

1415
static void gve_rx_free_buffer(struct device *dev,
1516
struct gve_rx_slot_page_info *page_info,
@@ -593,6 +594,31 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
593594
return skb;
594595
}
595596

597+
static int gve_xsk_pool_redirect(struct net_device *dev,
598+
struct gve_rx_ring *rx,
599+
void *data, int len,
600+
struct bpf_prog *xdp_prog)
601+
{
602+
struct xdp_buff *xdp;
603+
int err;
604+
605+
if (rx->xsk_pool->frame_len < len)
606+
return -E2BIG;
607+
xdp = xsk_buff_alloc(rx->xsk_pool);
608+
if (!xdp) {
609+
u64_stats_update_begin(&rx->statss);
610+
rx->xdp_alloc_fails++;
611+
u64_stats_update_end(&rx->statss);
612+
return -ENOMEM;
613+
}
614+
xdp->data_end = xdp->data + len;
615+
memcpy(xdp->data, data, len);
616+
err = xdp_do_redirect(dev, xdp, xdp_prog);
617+
if (err)
618+
xsk_buff_free(xdp);
619+
return err;
620+
}
621+
596622
static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx,
597623
struct xdp_buff *orig, struct bpf_prog *xdp_prog)
598624
{
@@ -602,6 +628,10 @@ static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx,
602628
void *frame;
603629
int err;
604630

631+
if (rx->xsk_pool)
632+
return gve_xsk_pool_redirect(dev, rx, orig->data,
633+
len, xdp_prog);
634+
605635
total_len = headroom + SKB_DATA_ALIGN(len) +
606636
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
607637
frame = page_frag_alloc(&rx->page_cache, total_len, GFP_ATOMIC);

0 commit comments

Comments
 (0)