Skip to content

Commit ae99e18

Browse files
committed
Merge branch 'mvpp2-software-TSO-support'
Antoine Tenart says: ==================== net: mvpp2: software TSO support This series adds the s/w TSO support in the PPv2 driver, in addition to two cosmetic commits. As stated in patch 3/3: Using iperf and 10G ports, using TSO shows a significant performance improvement by a factor 2 to reach around 9.5Gbps in TX; as well as a significant CPU usage drop (from 25% to 15%). ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 5f9ae3d + 186cd4d commit ae99e18

File tree

6 files changed

+164
-27
lines changed

6 files changed

+164
-27
lines changed

drivers/net/ethernet/cavium/thunder/nicvf_queues.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ struct snd_queue {
277277
u16 xdp_free_cnt;
278278
bool is_xdp;
279279

280-
#define TSO_HEADER_SIZE 128
281280
/* For TSO segment's header */
282281
char *tso_hdrs;
283282
dma_addr_t tso_hdrs_phys;

drivers/net/ethernet/freescale/fec_main.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
226226

227227
#define COPYBREAK_DEFAULT 256
228228

229-
#define TSO_HEADER_SIZE 128
230229
/* Max number of allowed TCP segments for software TSO */
231230
#define FEC_MAX_TSO_SEGS 100
232231
#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)

drivers/net/ethernet/marvell/mv643xx_eth.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,6 @@ static char mv643xx_eth_driver_version[] = "1.4";
183183
#define DEFAULT_TX_QUEUE_SIZE 512
184184
#define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
185185

186-
#define TSO_HEADER_SIZE 128
187-
188186
/* Max number of allowed TCP segments for software TSO */
189187
#define MV643XX_MAX_TSO_SEGS 100
190188
#define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)

drivers/net/ethernet/marvell/mvneta.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -281,9 +281,6 @@
281281
*/
282282
#define MVNETA_RSS_LU_TABLE_SIZE 1
283283

284-
/* TSO header size */
285-
#define TSO_HEADER_SIZE 128
286-
287284
/* Max number of Rx descriptors */
288285
#define MVNETA_MAX_RXD 128
289286

drivers/net/ethernet/marvell/mvpp2.c

Lines changed: 162 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <uapi/linux/ppp_defs.h>
3636
#include <net/ip.h>
3737
#include <net/ipv6.h>
38+
#include <net/tso.h>
3839

3940
/* RX Fifo Registers */
4041
#define MVPP2_RX_DATA_FIFO_SIZE_REG(port) (0x00 + 4 * (port))
@@ -1010,6 +1011,10 @@ struct mvpp2_txq_pcpu {
10101011

10111012
/* Index of the TX DMA descriptor to be cleaned up */
10121013
int txq_get_index;
1014+
1015+
/* DMA buffer for TSO headers */
1016+
char *tso_headers;
1017+
dma_addr_t tso_headers_dma;
10131018
};
10141019

10151020
struct mvpp2_tx_queue {
@@ -5284,15 +5289,14 @@ static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause,
52845289

52855290
/* Allocate and initialize descriptors for aggr TXQ */
52865291
static int mvpp2_aggr_txq_init(struct platform_device *pdev,
5287-
struct mvpp2_tx_queue *aggr_txq,
5288-
int desc_num, int cpu,
5292+
struct mvpp2_tx_queue *aggr_txq, int cpu,
52895293
struct mvpp2 *priv)
52905294
{
52915295
u32 txq_dma;
52925296

52935297
/* Allocate memory for TX descriptors */
52945298
aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
5295-
desc_num * MVPP2_DESC_ALIGNED_SIZE,
5299+
MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
52965300
&aggr_txq->descs_dma, GFP_KERNEL);
52975301
if (!aggr_txq->descs)
52985302
return -ENOMEM;
@@ -5313,7 +5317,8 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
53135317
MVPP22_AGGR_TXQ_DESC_ADDR_OFFS;
53145318

53155319
mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma);
5316-
mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num);
5320+
mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu),
5321+
MVPP2_AGGR_TXQ_SIZE);
53175322

53185323
return 0;
53195324
}
@@ -5494,13 +5499,26 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
54945499
txq_pcpu->reserved_num = 0;
54955500
txq_pcpu->txq_put_index = 0;
54965501
txq_pcpu->txq_get_index = 0;
5502+
5503+
txq_pcpu->tso_headers =
5504+
dma_alloc_coherent(port->dev->dev.parent,
5505+
MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
5506+
&txq_pcpu->tso_headers_dma,
5507+
GFP_KERNEL);
5508+
if (!txq_pcpu->tso_headers)
5509+
goto cleanup;
54975510
}
54985511

54995512
return 0;
55005513
cleanup:
55015514
for_each_present_cpu(cpu) {
55025515
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
55035516
kfree(txq_pcpu->buffs);
5517+
5518+
dma_free_coherent(port->dev->dev.parent,
5519+
MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
5520+
txq_pcpu->tso_headers,
5521+
txq_pcpu->tso_headers_dma);
55045522
}
55055523

55065524
dma_free_coherent(port->dev->dev.parent,
@@ -5520,6 +5538,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
55205538
for_each_present_cpu(cpu) {
55215539
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
55225540
kfree(txq_pcpu->buffs);
5541+
5542+
dma_free_coherent(port->dev->dev.parent,
5543+
MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
5544+
txq_pcpu->tso_headers,
5545+
txq_pcpu->tso_headers_dma);
55235546
}
55245547

55255548
if (txq->descs)
@@ -6049,6 +6072,123 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
60496072
return -ENOMEM;
60506073
}
60516074

6075+
static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
6076+
struct net_device *dev,
6077+
struct mvpp2_tx_queue *txq,
6078+
struct mvpp2_tx_queue *aggr_txq,
6079+
struct mvpp2_txq_pcpu *txq_pcpu,
6080+
int hdr_sz)
6081+
{
6082+
struct mvpp2_port *port = netdev_priv(dev);
6083+
struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
6084+
dma_addr_t addr;
6085+
6086+
mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
6087+
mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);
6088+
6089+
addr = txq_pcpu->tso_headers_dma +
6090+
txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
6091+
mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
6092+
mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);
6093+
6094+
mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
6095+
MVPP2_TXD_F_DESC |
6096+
MVPP2_TXD_PADDING_DISABLE);
6097+
mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
6098+
}
6099+
6100+
static inline int mvpp2_tso_put_data(struct sk_buff *skb,
6101+
struct net_device *dev, struct tso_t *tso,
6102+
struct mvpp2_tx_queue *txq,
6103+
struct mvpp2_tx_queue *aggr_txq,
6104+
struct mvpp2_txq_pcpu *txq_pcpu,
6105+
int sz, bool left, bool last)
6106+
{
6107+
struct mvpp2_port *port = netdev_priv(dev);
6108+
struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
6109+
dma_addr_t buf_dma_addr;
6110+
6111+
mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
6112+
mvpp2_txdesc_size_set(port, tx_desc, sz);
6113+
6114+
buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz,
6115+
DMA_TO_DEVICE);
6116+
if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
6117+
mvpp2_txq_desc_put(txq);
6118+
return -ENOMEM;
6119+
}
6120+
6121+
mvpp2_txdesc_offset_set(port, tx_desc,
6122+
buf_dma_addr & MVPP2_TX_DESC_ALIGN);
6123+
mvpp2_txdesc_dma_addr_set(port, tx_desc,
6124+
buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
6125+
6126+
if (!left) {
6127+
mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
6128+
if (last) {
6129+
mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
6130+
return 0;
6131+
}
6132+
} else {
6133+
mvpp2_txdesc_cmd_set(port, tx_desc, 0);
6134+
}
6135+
6136+
mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
6137+
return 0;
6138+
}
6139+
6140+
static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev,
6141+
struct mvpp2_tx_queue *txq,
6142+
struct mvpp2_tx_queue *aggr_txq,
6143+
struct mvpp2_txq_pcpu *txq_pcpu)
6144+
{
6145+
struct mvpp2_port *port = netdev_priv(dev);
6146+
struct tso_t tso;
6147+
int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb);
6148+
int i, len, descs = 0;
6149+
6150+
/* Check number of available descriptors */
6151+
if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
6152+
tso_count_descs(skb)) ||
6153+
mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
6154+
tso_count_descs(skb)))
6155+
return 0;
6156+
6157+
tso_start(skb, &tso);
6158+
len = skb->len - hdr_sz;
6159+
while (len > 0) {
6160+
int left = min_t(int, skb_shinfo(skb)->gso_size, len);
6161+
char *hdr = txq_pcpu->tso_headers +
6162+
txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
6163+
6164+
len -= left;
6165+
descs++;
6166+
6167+
tso_build_hdr(skb, hdr, &tso, left, len == 0);
6168+
mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz);
6169+
6170+
while (left > 0) {
6171+
int sz = min_t(int, tso.size, left);
6172+
left -= sz;
6173+
descs++;
6174+
6175+
if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq,
6176+
txq_pcpu, sz, left, len == 0))
6177+
goto release;
6178+
tso_build_data(skb, &tso, sz);
6179+
}
6180+
}
6181+
6182+
return descs;
6183+
6184+
release:
6185+
for (i = descs - 1; i >= 0; i--) {
6186+
struct mvpp2_tx_desc *tx_desc = txq->descs + i;
6187+
tx_desc_unmap_put(port, txq, tx_desc);
6188+
}
6189+
return 0;
6190+
}
6191+
60526192
/* Main tx processing */
60536193
static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
60546194
{
@@ -6066,6 +6206,10 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
60666206
txq_pcpu = this_cpu_ptr(txq->pcpu);
60676207
aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
60686208

6209+
if (skb_is_gso(skb)) {
6210+
frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
6211+
goto out;
6212+
}
60696213
frags = skb_shinfo(skb)->nr_frags + 1;
60706214

60716215
/* Check number of available descriptors */
@@ -6115,22 +6259,21 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
61156259
}
61166260
}
61176261

6118-
txq_pcpu->reserved_num -= frags;
6119-
txq_pcpu->count += frags;
6120-
aggr_txq->count += frags;
6121-
6122-
/* Enable transmit */
6123-
wmb();
6124-
mvpp2_aggr_txq_pend_desc_add(port, frags);
6125-
6126-
if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
6127-
struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
6128-
6129-
netif_tx_stop_queue(nq);
6130-
}
61316262
out:
61326263
if (frags > 0) {
61336264
struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
6265+
struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
6266+
6267+
txq_pcpu->reserved_num -= frags;
6268+
txq_pcpu->count += frags;
6269+
aggr_txq->count += frags;
6270+
6271+
/* Enable transmit */
6272+
wmb();
6273+
mvpp2_aggr_txq_pend_desc_add(port, frags);
6274+
6275+
if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
6276+
netif_tx_stop_queue(nq);
61346277

61356278
u64_stats_update_begin(&stats->syncp);
61366279
stats->tx_packets++;
@@ -7255,7 +7398,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
72557398
}
72567399
}
72577400

7258-
features = NETIF_F_SG | NETIF_F_IP_CSUM;
7401+
features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
72597402
dev->features = features | NETIF_F_RXCSUM;
72607403
dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
72617404
dev->vlan_features |= features;
@@ -7445,8 +7588,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
74457588
for_each_present_cpu(i) {
74467589
priv->aggr_txqs[i].id = i;
74477590
priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE;
7448-
err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i],
7449-
MVPP2_AGGR_TXQ_SIZE, i, priv);
7591+
err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], i, priv);
74507592
if (err < 0)
74517593
return err;
74527594
}

include/net/tso.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include <net/ip.h>
55

6+
#define TSO_HEADER_SIZE 128
7+
68
struct tso_t {
79
int next_frag_idx;
810
void *data;

0 commit comments

Comments
 (0)