Skip to content

Commit c0f031b

Browse files
Jakub Kicinskidavem330
authored andcommitted
nfp: use alloc_frag() and build_skb()
Speed up RX processing by moving to the alloc_frag()/build_skb() paradigm. Since we're no longer mapping the entire buffer for DMA add helpers which take care of calculating offsets and lengths. Signed-off-by: Jakub Kicinski <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e9949ae commit c0f031b

File tree

3 files changed

+82
-50
lines changed

3 files changed

+82
-50
lines changed

drivers/net/ethernet/netronome/nfp/nfp_net.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@
101101
/* Offload definitions */
102102
#define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16))
103103

104+
#define NFP_NET_RX_BUF_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
105+
#define NFP_NET_RX_BUF_NON_DATA (NFP_NET_RX_BUF_HEADROOM + \
106+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
107+
104108
/* Forward declarations */
105109
struct nfp_net;
106110
struct nfp_net_r_vector;
@@ -277,11 +281,11 @@ struct nfp_net_rx_hash {
277281

278282
/**
279283
* struct nfp_net_rx_buf - software RX buffer descriptor
280-
* @skb: sk_buff associated with this buffer
284+
* @frag: page fragment buffer
281285
* @dma_addr: DMA mapping address of the buffer
282286
*/
283287
struct nfp_net_rx_buf {
284-
struct sk_buff *skb;
288+
void *frag;
285289
dma_addr_t dma_addr;
286290
};
287291

drivers/net/ethernet/netronome/nfp/nfp_net_common.c

Lines changed: 72 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include <linux/interrupt.h>
5151
#include <linux/ip.h>
5252
#include <linux/ipv6.h>
53+
#include <linux/page_ref.h>
5354
#include <linux/pci.h>
5455
#include <linux/pci_regs.h>
5556
#include <linux/msi.h>
@@ -80,6 +81,22 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
8081
put_unaligned_le32(reg, fw_ver);
8182
}
8283

84+
static dma_addr_t
85+
nfp_net_dma_map_rx(struct nfp_net *nn, void *frag, unsigned int bufsz,
86+
int direction)
87+
{
88+
return dma_map_single(&nn->pdev->dev, frag + NFP_NET_RX_BUF_HEADROOM,
89+
bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
90+
}
91+
92+
static void
93+
nfp_net_dma_unmap_rx(struct nfp_net *nn, dma_addr_t dma_addr,
94+
unsigned int bufsz, int direction)
95+
{
96+
dma_unmap_single(&nn->pdev->dev, dma_addr,
97+
bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
98+
}
99+
83100
/* Firmware reconfig
84101
*
85102
* Firmware reconfig may take a while so we have two versions of it -
@@ -1035,64 +1052,67 @@ nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu)
10351052
{
10361053
unsigned int fl_bufsz;
10371054

1055+
fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
10381056
if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1039-
fl_bufsz = NFP_NET_MAX_PREPEND;
1057+
fl_bufsz += NFP_NET_MAX_PREPEND;
10401058
else
1041-
fl_bufsz = nn->rx_offset;
1059+
fl_bufsz += nn->rx_offset;
10421060
fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu;
10431061

1062+
fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
1063+
fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1064+
10441065
return fl_bufsz;
10451066
}
10461067

10471068
/**
1048-
* nfp_net_rx_alloc_one() - Allocate and map skb for RX
1069+
* nfp_net_rx_alloc_one() - Allocate and map page frag for RX
10491070
* @rx_ring: RX ring structure of the skb
10501071
* @dma_addr: Pointer to storage for DMA address (output param)
10511072
* @fl_bufsz: size of freelist buffers
10521073
*
1053-
* This function will allcate a new skb, map it for DMA.
1074+
* This function will allcate a new page frag, map it for DMA.
10541075
*
1055-
* Return: allocated skb or NULL on failure.
1076+
* Return: allocated page frag or NULL on failure.
10561077
*/
1057-
static struct sk_buff *
1078+
static void *
10581079
nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr,
10591080
unsigned int fl_bufsz)
10601081
{
10611082
struct nfp_net *nn = rx_ring->r_vec->nfp_net;
1062-
struct sk_buff *skb;
1083+
void *frag;
10631084

1064-
skb = netdev_alloc_skb(nn->netdev, fl_bufsz);
1065-
if (!skb) {
1066-
nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n");
1085+
frag = netdev_alloc_frag(fl_bufsz);
1086+
if (!frag) {
1087+
nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
10671088
return NULL;
10681089
}
10691090

1070-
*dma_addr = dma_map_single(&nn->pdev->dev, skb->data,
1071-
fl_bufsz, DMA_FROM_DEVICE);
1091+
*dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, DMA_FROM_DEVICE);
10721092
if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
1073-
dev_kfree_skb_any(skb);
1093+
skb_free_frag(frag);
10741094
nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
10751095
return NULL;
10761096
}
10771097

1078-
return skb;
1098+
return frag;
10791099
}
10801100

10811101
/**
10821102
* nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
10831103
* @rx_ring: RX ring structure
1084-
* @skb: Skb to put on rings
1104+
* @frag: page fragment buffer
10851105
* @dma_addr: DMA address of skb mapping
10861106
*/
10871107
static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
1088-
struct sk_buff *skb, dma_addr_t dma_addr)
1108+
void *frag, dma_addr_t dma_addr)
10891109
{
10901110
unsigned int wr_idx;
10911111

10921112
wr_idx = rx_ring->wr_p % rx_ring->cnt;
10931113

10941114
/* Stash SKB and DMA address away */
1095-
rx_ring->rxbufs[wr_idx].skb = skb;
1115+
rx_ring->rxbufs[wr_idx].frag = frag;
10961116
rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
10971117

10981118
/* Fill freelist descriptor */
@@ -1127,9 +1147,9 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
11271147
wr_idx = rx_ring->wr_p % rx_ring->cnt;
11281148
last_idx = rx_ring->cnt - 1;
11291149
rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
1130-
rx_ring->rxbufs[wr_idx].skb = rx_ring->rxbufs[last_idx].skb;
1150+
rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
11311151
rx_ring->rxbufs[last_idx].dma_addr = 0;
1132-
rx_ring->rxbufs[last_idx].skb = NULL;
1152+
rx_ring->rxbufs[last_idx].frag = NULL;
11331153

11341154
memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt);
11351155
rx_ring->wr_p = 0;
@@ -1149,22 +1169,21 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
11491169
static void
11501170
nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
11511171
{
1152-
struct pci_dev *pdev = nn->pdev;
11531172
unsigned int i;
11541173

11551174
for (i = 0; i < rx_ring->cnt - 1; i++) {
11561175
/* NULL skb can only happen when initial filling of the ring
11571176
* fails to allocate enough buffers and calls here to free
11581177
* already allocated ones.
11591178
*/
1160-
if (!rx_ring->rxbufs[i].skb)
1179+
if (!rx_ring->rxbufs[i].frag)
11611180
continue;
11621181

1163-
dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr,
1164-
rx_ring->bufsz, DMA_FROM_DEVICE);
1165-
dev_kfree_skb_any(rx_ring->rxbufs[i].skb);
1182+
nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr,
1183+
rx_ring->bufsz, DMA_FROM_DEVICE);
1184+
skb_free_frag(rx_ring->rxbufs[i].frag);
11661185
rx_ring->rxbufs[i].dma_addr = 0;
1167-
rx_ring->rxbufs[i].skb = NULL;
1186+
rx_ring->rxbufs[i].frag = NULL;
11681187
}
11691188
}
11701189

@@ -1182,10 +1201,10 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
11821201
rxbufs = rx_ring->rxbufs;
11831202

11841203
for (i = 0; i < rx_ring->cnt - 1; i++) {
1185-
rxbufs[i].skb =
1204+
rxbufs[i].frag =
11861205
nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr,
11871206
rx_ring->bufsz);
1188-
if (!rxbufs[i].skb) {
1207+
if (!rxbufs[i].frag) {
11891208
nfp_net_rx_ring_bufs_free(nn, rx_ring);
11901209
return -ENOMEM;
11911210
}
@@ -1203,7 +1222,7 @@ static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
12031222
unsigned int i;
12041223

12051224
for (i = 0; i < rx_ring->cnt - 1; i++)
1206-
nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].skb,
1225+
nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].frag,
12071226
rx_ring->rxbufs[i].dma_addr);
12081227
}
12091228

@@ -1338,8 +1357,13 @@ nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring,
13381357
r_vec->rx_drops++;
13391358
u64_stats_update_end(&r_vec->rx_sync);
13401359

1360+
/* skb is build based on the frag, free_skb() would free the frag
1361+
* so to be able to reuse it we need an extra ref.
1362+
*/
1363+
if (skb && rxbuf && skb->head == rxbuf->frag)
1364+
page_ref_inc(virt_to_head_page(rxbuf->frag));
13411365
if (rxbuf)
1342-
nfp_net_rx_give_one(rx_ring, rxbuf->skb, rxbuf->dma_addr);
1366+
nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr);
13431367
if (skb)
13441368
dev_kfree_skb_any(skb);
13451369
}
@@ -1360,10 +1384,12 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
13601384
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
13611385
struct nfp_net *nn = r_vec->nfp_net;
13621386
unsigned int data_len, meta_len;
1363-
struct sk_buff *skb, *new_skb;
1387+
struct nfp_net_rx_buf *rxbuf;
13641388
struct nfp_net_rx_desc *rxd;
13651389
dma_addr_t new_dma_addr;
1390+
struct sk_buff *skb;
13661391
int pkts_polled = 0;
1392+
void *new_frag;
13671393
int idx;
13681394

13691395
while (pkts_polled < budget) {
@@ -1381,21 +1407,23 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
13811407
rx_ring->rd_p++;
13821408
pkts_polled++;
13831409

1384-
skb = rx_ring->rxbufs[idx].skb;
1385-
1386-
new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr,
1387-
nn->fl_bufsz);
1388-
if (!new_skb) {
1389-
nfp_net_rx_drop(r_vec, rx_ring, &rx_ring->rxbufs[idx],
1390-
NULL);
1410+
rxbuf = &rx_ring->rxbufs[idx];
1411+
skb = build_skb(rxbuf->frag, nn->fl_bufsz);
1412+
if (unlikely(!skb)) {
1413+
nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL);
1414+
continue;
1415+
}
1416+
new_frag = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr,
1417+
nn->fl_bufsz);
1418+
if (unlikely(!new_frag)) {
1419+
nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb);
13911420
continue;
13921421
}
13931422

1394-
dma_unmap_single(&nn->pdev->dev,
1395-
rx_ring->rxbufs[idx].dma_addr,
1396-
nn->fl_bufsz, DMA_FROM_DEVICE);
1423+
nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[idx].dma_addr,
1424+
nn->fl_bufsz, DMA_FROM_DEVICE);
13971425

1398-
nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr);
1426+
nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
13991427

14001428
/* < meta_len >
14011429
* <-- [rx_offset] -->
@@ -1413,9 +1441,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
14131441
data_len = le16_to_cpu(rxd->rxd.data_len);
14141442

14151443
if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1416-
skb_reserve(skb, meta_len);
1444+
skb_reserve(skb, NFP_NET_RX_BUF_HEADROOM + meta_len);
14171445
else
1418-
skb_reserve(skb, nn->rx_offset);
1446+
skb_reserve(skb,
1447+
NFP_NET_RX_BUF_HEADROOM + nn->rx_offset);
14191448
skb_put(skb, data_len - meta_len);
14201449

14211450
/* Stats update */

drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
4444
struct nfp_net_r_vector *r_vec = file->private;
4545
struct nfp_net_rx_ring *rx_ring;
4646
struct nfp_net_rx_desc *rxd;
47-
struct sk_buff *skb;
4847
struct nfp_net *nn;
48+
void *frag;
4949
int i;
5050

5151
rtnl_lock();
@@ -73,10 +73,9 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
7373
seq_printf(file, "%04d: 0x%08x 0x%08x", i,
7474
rxd->vals[0], rxd->vals[1]);
7575

76-
skb = READ_ONCE(rx_ring->rxbufs[i].skb);
77-
if (skb)
78-
seq_printf(file, " skb->head=%p skb->data=%p",
79-
skb->head, skb->data);
76+
frag = READ_ONCE(rx_ring->rxbufs[i].frag);
77+
if (frag)
78+
seq_printf(file, " frag=%p", frag);
8079

8180
if (rx_ring->rxbufs[i].dma_addr)
8281
seq_printf(file, " dma_addr=%pad",

0 commit comments

Comments
 (0)