Skip to content

Commit fb8629e

Browse files
IoanaCiorneidavem330
authored andcommitted
net: enetc: add support for software TSO
This patch adds support for driver level TSO in the enetc driver using the TSO API. Beside using the usual tso_build_hdr(), tso_build_data() this specific implementation also has to compute the checksum, both IP and L4, for each resulted segment. This is because the ENETC controller does not support Tx checksum offload which is needed in order to take advantage of TSO. With the workaround for the ENETC MDIO erratum in place the Tx path of the driver is forced to lock/unlock for each skb sent. This is why, even though we are computing the checksum by hand we see the following improvement in TCP termination on the LS1028A SoC, on a single A72 core running at 1.3GHz: before: 1.63 Gbits/sec after: 2.34 Gbits/sec Signed-off-by: Ioana Ciornei <[email protected]> Reviewed-by: Claudiu Manoil <[email protected]> Reviewed-by: Vladimir Oltean <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent acede3c commit fb8629e

File tree

4 files changed

+311
-24
lines changed

4 files changed

+311
-24
lines changed

drivers/net/ethernet/freescale/enetc/enetc.c

Lines changed: 299 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/vmalloc.h>
99
#include <linux/ptp_classify.h>
1010
#include <net/pkt_sched.h>
11+
#include <net/tso.h>
1112

1213
static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv)
1314
{
@@ -314,6 +315,255 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
314315
return 0;
315316
}
316317

318+
static void enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb,
319+
struct enetc_tx_swbd *tx_swbd,
320+
union enetc_tx_bd *txbd, int *i, int hdr_len,
321+
int data_len)
322+
{
323+
union enetc_tx_bd txbd_tmp;
324+
u8 flags = 0, e_flags = 0;
325+
dma_addr_t addr;
326+
327+
enetc_clear_tx_bd(&txbd_tmp);
328+
addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE;
329+
330+
if (skb_vlan_tag_present(skb))
331+
flags |= ENETC_TXBD_FLAGS_EX;
332+
333+
txbd_tmp.addr = cpu_to_le64(addr);
334+
txbd_tmp.buf_len = cpu_to_le16(hdr_len);
335+
336+
/* first BD needs frm_len and offload flags set */
337+
txbd_tmp.frm_len = cpu_to_le16(hdr_len + data_len);
338+
txbd_tmp.flags = flags;
339+
340+
/* For the TSO header we do not set the dma address since we do not
341+
* want it unmapped when we do cleanup. We still set len so that we
342+
* count the bytes sent.
343+
*/
344+
tx_swbd->len = hdr_len;
345+
tx_swbd->do_twostep_tstamp = false;
346+
tx_swbd->check_wb = false;
347+
348+
/* Actually write the header in the BD */
349+
*txbd = txbd_tmp;
350+
351+
/* Add extension BD for VLAN */
352+
if (flags & ENETC_TXBD_FLAGS_EX) {
353+
/* Get the next BD */
354+
enetc_bdr_idx_inc(tx_ring, i);
355+
txbd = ENETC_TXBD(*tx_ring, *i);
356+
tx_swbd = &tx_ring->tx_swbd[*i];
357+
prefetchw(txbd);
358+
359+
/* Setup the VLAN fields */
360+
enetc_clear_tx_bd(&txbd_tmp);
361+
txbd_tmp.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb));
362+
txbd_tmp.ext.tpid = 0; /* < C-TAG */
363+
e_flags |= ENETC_TXBD_E_FLAGS_VLAN_INS;
364+
365+
/* Write the BD */
366+
txbd_tmp.ext.e_flags = e_flags;
367+
*txbd = txbd_tmp;
368+
}
369+
}
370+
371+
static int enetc_map_tx_tso_data(struct enetc_bdr *tx_ring, struct sk_buff *skb,
372+
struct enetc_tx_swbd *tx_swbd,
373+
union enetc_tx_bd *txbd, char *data,
374+
int size, bool last_bd)
375+
{
376+
union enetc_tx_bd txbd_tmp;
377+
dma_addr_t addr;
378+
u8 flags = 0;
379+
380+
enetc_clear_tx_bd(&txbd_tmp);
381+
382+
addr = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
383+
if (unlikely(dma_mapping_error(tx_ring->dev, addr))) {
384+
netdev_err(tx_ring->ndev, "DMA map error\n");
385+
return -ENOMEM;
386+
}
387+
388+
if (last_bd) {
389+
flags |= ENETC_TXBD_FLAGS_F;
390+
tx_swbd->is_eof = 1;
391+
}
392+
393+
txbd_tmp.addr = cpu_to_le64(addr);
394+
txbd_tmp.buf_len = cpu_to_le16(size);
395+
txbd_tmp.flags = flags;
396+
397+
tx_swbd->dma = addr;
398+
tx_swbd->len = size;
399+
tx_swbd->dir = DMA_TO_DEVICE;
400+
401+
*txbd = txbd_tmp;
402+
403+
return 0;
404+
}
405+
406+
static __wsum enetc_tso_hdr_csum(struct tso_t *tso, struct sk_buff *skb,
407+
char *hdr, int hdr_len, int *l4_hdr_len)
408+
{
409+
char *l4_hdr = hdr + skb_transport_offset(skb);
410+
int mac_hdr_len = skb_network_offset(skb);
411+
412+
if (tso->tlen != sizeof(struct udphdr)) {
413+
struct tcphdr *tcph = (struct tcphdr *)(l4_hdr);
414+
415+
tcph->check = 0;
416+
} else {
417+
struct udphdr *udph = (struct udphdr *)(l4_hdr);
418+
419+
udph->check = 0;
420+
}
421+
422+
/* Compute the IP checksum. This is necessary since tso_build_hdr()
423+
* already incremented the IP ID field.
424+
*/
425+
if (!tso->ipv6) {
426+
struct iphdr *iph = (void *)(hdr + mac_hdr_len);
427+
428+
iph->check = 0;
429+
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
430+
}
431+
432+
/* Compute the checksum over the L4 header. */
433+
*l4_hdr_len = hdr_len - skb_transport_offset(skb);
434+
return csum_partial(l4_hdr, *l4_hdr_len, 0);
435+
}
436+
437+
static void enetc_tso_complete_csum(struct enetc_bdr *tx_ring, struct tso_t *tso,
438+
struct sk_buff *skb, char *hdr, int len,
439+
__wsum sum)
440+
{
441+
char *l4_hdr = hdr + skb_transport_offset(skb);
442+
__sum16 csum_final;
443+
444+
/* Complete the L4 checksum by appending the pseudo-header to the
445+
* already computed checksum.
446+
*/
447+
if (!tso->ipv6)
448+
csum_final = csum_tcpudp_magic(ip_hdr(skb)->saddr,
449+
ip_hdr(skb)->daddr,
450+
len, ip_hdr(skb)->protocol, sum);
451+
else
452+
csum_final = csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
453+
&ipv6_hdr(skb)->daddr,
454+
len, ipv6_hdr(skb)->nexthdr, sum);
455+
456+
if (tso->tlen != sizeof(struct udphdr)) {
457+
struct tcphdr *tcph = (struct tcphdr *)(l4_hdr);
458+
459+
tcph->check = csum_final;
460+
} else {
461+
struct udphdr *udph = (struct udphdr *)(l4_hdr);
462+
463+
udph->check = csum_final;
464+
}
465+
}
466+
467+
static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
468+
{
469+
int hdr_len, total_len, data_len;
470+
struct enetc_tx_swbd *tx_swbd;
471+
union enetc_tx_bd *txbd;
472+
struct tso_t tso;
473+
__wsum csum, csum2;
474+
int count = 0, pos;
475+
int err, i, bd_data_num;
476+
477+
/* Initialize the TSO handler, and prepare the first payload */
478+
hdr_len = tso_start(skb, &tso);
479+
total_len = skb->len - hdr_len;
480+
i = tx_ring->next_to_use;
481+
482+
while (total_len > 0) {
483+
char *hdr;
484+
485+
/* Get the BD */
486+
txbd = ENETC_TXBD(*tx_ring, i);
487+
tx_swbd = &tx_ring->tx_swbd[i];
488+
prefetchw(txbd);
489+
490+
/* Determine the length of this packet */
491+
data_len = min_t(int, skb_shinfo(skb)->gso_size, total_len);
492+
total_len -= data_len;
493+
494+
/* prepare packet headers: MAC + IP + TCP */
495+
hdr = tx_ring->tso_headers + i * TSO_HEADER_SIZE;
496+
tso_build_hdr(skb, hdr, &tso, data_len, total_len == 0);
497+
498+
/* compute the csum over the L4 header */
499+
csum = enetc_tso_hdr_csum(&tso, skb, hdr, hdr_len, &pos);
500+
enetc_map_tx_tso_hdr(tx_ring, skb, tx_swbd, txbd, &i, hdr_len, data_len);
501+
bd_data_num = 0;
502+
count++;
503+
504+
while (data_len > 0) {
505+
int size;
506+
507+
size = min_t(int, tso.size, data_len);
508+
509+
/* Advance the index in the BDR */
510+
enetc_bdr_idx_inc(tx_ring, &i);
511+
txbd = ENETC_TXBD(*tx_ring, i);
512+
tx_swbd = &tx_ring->tx_swbd[i];
513+
prefetchw(txbd);
514+
515+
/* Compute the checksum over this segment of data and
516+
* add it to the csum already computed (over the L4
517+
* header and possible other data segments).
518+
*/
519+
csum2 = csum_partial(tso.data, size, 0);
520+
csum = csum_block_add(csum, csum2, pos);
521+
pos += size;
522+
523+
err = enetc_map_tx_tso_data(tx_ring, skb, tx_swbd, txbd,
524+
tso.data, size,
525+
size == data_len);
526+
if (err)
527+
goto err_map_data;
528+
529+
data_len -= size;
530+
count++;
531+
bd_data_num++;
532+
tso_build_data(skb, &tso, size);
533+
534+
if (unlikely(bd_data_num >= ENETC_MAX_SKB_FRAGS && data_len))
535+
goto err_chained_bd;
536+
}
537+
538+
enetc_tso_complete_csum(tx_ring, &tso, skb, hdr, pos, csum);
539+
540+
if (total_len == 0)
541+
tx_swbd->skb = skb;
542+
543+
/* Go to the next BD */
544+
enetc_bdr_idx_inc(tx_ring, &i);
545+
}
546+
547+
tx_ring->next_to_use = i;
548+
enetc_update_tx_ring_tail(tx_ring);
549+
550+
return count;
551+
552+
err_map_data:
553+
dev_err(tx_ring->dev, "DMA map error");
554+
555+
err_chained_bd:
556+
do {
557+
tx_swbd = &tx_ring->tx_swbd[i];
558+
enetc_free_tx_frame(tx_ring, tx_swbd);
559+
if (i == 0)
560+
i = tx_ring->bd_count;
561+
i--;
562+
} while (count--);
563+
564+
return 0;
565+
}
566+
317567
static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
318568
struct net_device *ndev)
319569
{
@@ -332,26 +582,36 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
332582

333583
tx_ring = priv->tx_ring[skb->queue_mapping];
334584

335-
if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
336-
if (unlikely(skb_linearize(skb)))
337-
goto drop_packet_err;
585+
if (skb_is_gso(skb)) {
586+
if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) {
587+
netif_stop_subqueue(ndev, tx_ring->index);
588+
return NETDEV_TX_BUSY;
589+
}
338590

339-
count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */
340-
if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) {
341-
netif_stop_subqueue(ndev, tx_ring->index);
342-
return NETDEV_TX_BUSY;
343-
}
591+
enetc_lock_mdio();
592+
count = enetc_map_tx_tso_buffs(tx_ring, skb);
593+
enetc_unlock_mdio();
594+
} else {
595+
if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
596+
if (unlikely(skb_linearize(skb)))
597+
goto drop_packet_err;
598+
599+
count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */
600+
if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) {
601+
netif_stop_subqueue(ndev, tx_ring->index);
602+
return NETDEV_TX_BUSY;
603+
}
344604

345-
if (skb->ip_summed == CHECKSUM_PARTIAL) {
346-
err = skb_checksum_help(skb);
347-
if (err)
348-
goto drop_packet_err;
605+
if (skb->ip_summed == CHECKSUM_PARTIAL) {
606+
err = skb_checksum_help(skb);
607+
if (err)
608+
goto drop_packet_err;
609+
}
610+
enetc_lock_mdio();
611+
count = enetc_map_tx_buffs(tx_ring, skb);
612+
enetc_unlock_mdio();
349613
}
350614

351-
enetc_lock_mdio();
352-
count = enetc_map_tx_buffs(tx_ring, skb);
353-
enetc_unlock_mdio();
354-
355615
if (unlikely(!count))
356616
goto drop_packet_err;
357617

@@ -1499,15 +1759,30 @@ static int enetc_alloc_txbdr(struct enetc_bdr *txr)
14991759
return -ENOMEM;
15001760

15011761
err = enetc_dma_alloc_bdr(txr, sizeof(union enetc_tx_bd));
1502-
if (err) {
1503-
vfree(txr->tx_swbd);
1504-
return err;
1505-
}
1762+
if (err)
1763+
goto err_alloc_bdr;
1764+
1765+
txr->tso_headers = dma_alloc_coherent(txr->dev,
1766+
txr->bd_count * TSO_HEADER_SIZE,
1767+
&txr->tso_headers_dma,
1768+
GFP_KERNEL);
1769+
if (err)
1770+
goto err_alloc_tso;
15061771

15071772
txr->next_to_clean = 0;
15081773
txr->next_to_use = 0;
15091774

15101775
return 0;
1776+
1777+
err_alloc_tso:
1778+
dma_free_coherent(txr->dev, txr->bd_count * sizeof(union enetc_tx_bd),
1779+
txr->bd_base, txr->bd_dma_base);
1780+
txr->bd_base = NULL;
1781+
err_alloc_bdr:
1782+
vfree(txr->tx_swbd);
1783+
txr->tx_swbd = NULL;
1784+
1785+
return err;
15111786
}
15121787

15131788
static void enetc_free_txbdr(struct enetc_bdr *txr)
@@ -1519,6 +1794,10 @@ static void enetc_free_txbdr(struct enetc_bdr *txr)
15191794

15201795
size = txr->bd_count * sizeof(union enetc_tx_bd);
15211796

1797+
dma_free_coherent(txr->dev, txr->bd_count * TSO_HEADER_SIZE,
1798+
txr->tso_headers, txr->tso_headers_dma);
1799+
txr->tso_headers = NULL;
1800+
15221801
dma_free_coherent(txr->dev, size, txr->bd_base, txr->bd_dma_base);
15231802
txr->bd_base = NULL;
15241803

drivers/net/ethernet/freescale/enetc/enetc.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ struct enetc_bdr {
112112
dma_addr_t bd_dma_base;
113113
u8 tsd_enable; /* Time specific departure */
114114
bool ext_en; /* enable h/w descriptor extensions */
115+
116+
/* DMA buffer for TSO headers */
117+
char *tso_headers;
118+
dma_addr_t tso_headers_dma;
115119
} ____cacheline_aligned_in_smp;
116120

117121
static inline void enetc_bdr_idx_inc(struct enetc_bdr *bdr, int *i)

drivers/net/ethernet/freescale/enetc/enetc_pf.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -760,11 +760,13 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
760760
ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
761761
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
762762
NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK |
763-
NETIF_F_HW_CSUM;
763+
NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
764764
ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM |
765765
NETIF_F_HW_VLAN_CTAG_TX |
766766
NETIF_F_HW_VLAN_CTAG_RX |
767-
NETIF_F_HW_CSUM;
767+
NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
768+
ndev->vlan_features = NETIF_F_SG | NETIF_F_HW_CSUM |
769+
NETIF_F_TSO | NETIF_F_TSO6;
768770

769771
if (si->num_rss)
770772
ndev->hw_features |= NETIF_F_RXHASH;

0 commit comments

Comments
 (0)