Skip to content

Commit b30d27f

Browse files
committed
Merge branch 'mediatek-stress-test-fixes'
John Crispin says: ==================== net: mediatek: make the driver pass stress tests While testing the driver we managed to get the TX path to stall and fail to recover. When dual MAC support was added to the driver, the whole queue stop/wake code was not properly adapted. There was also a regression in the locking of the xmit function. The fact that watchdog_timeo was not set and that the tx_timeout code failed to properly reset the dma, irq and queue just made the mess complete. This series make the driver pass stress testing. With this series applied the testbed has been running for several days and still has not locked up. We have a second setup that has a small hack patch applied to randomly stop irqs and/or one of the queues and successfully manages to recover from these simulated tx stalls. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents da0caad + 369f045 commit b30d27f

File tree

2 files changed

+66
-44
lines changed

2 files changed

+66
-44
lines changed

drivers/net/ethernet/mediatek/mtk_eth_soc.c

Lines changed: 64 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
536536
struct mtk_eth *eth = mac->hw;
537537
struct mtk_tx_dma *itxd, *txd;
538538
struct mtk_tx_buf *tx_buf;
539-
unsigned long flags;
540539
dma_addr_t mapped_addr;
541540
unsigned int nr_frags;
542541
int i, n_desc = 1;
@@ -568,11 +567,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
568567
if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
569568
return -ENOMEM;
570569

571-
/* normally we can rely on the stack not calling this more than once,
572-
* however we have 2 queues running ont he same ring so we need to lock
573-
* the ring access
574-
*/
575-
spin_lock_irqsave(&eth->page_lock, flags);
576570
WRITE_ONCE(itxd->txd1, mapped_addr);
577571
tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
578572
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
@@ -609,8 +603,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
609603
WRITE_ONCE(txd->txd1, mapped_addr);
610604
WRITE_ONCE(txd->txd3, (TX_DMA_SWC |
611605
TX_DMA_PLEN0(frag_map_size) |
612-
last_frag * TX_DMA_LS0) |
613-
mac->id);
606+
last_frag * TX_DMA_LS0));
614607
WRITE_ONCE(txd->txd4, 0);
615608

616609
tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
@@ -632,8 +625,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
632625
WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
633626
(!nr_frags * TX_DMA_LS0)));
634627

635-
spin_unlock_irqrestore(&eth->page_lock, flags);
636-
637628
netdev_sent_queue(dev, skb->len);
638629
skb_tx_timestamp(skb);
639630

@@ -661,8 +652,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
661652
itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
662653
} while (itxd != txd);
663654

664-
spin_unlock_irqrestore(&eth->page_lock, flags);
665-
666655
return -ENOMEM;
667656
}
668657

@@ -681,7 +670,29 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
681670
nfrags += skb_shinfo(skb)->nr_frags;
682671
}
683672

684-
return DIV_ROUND_UP(nfrags, 2);
673+
return nfrags;
674+
}
675+
676+
static void mtk_wake_queue(struct mtk_eth *eth)
677+
{
678+
int i;
679+
680+
for (i = 0; i < MTK_MAC_COUNT; i++) {
681+
if (!eth->netdev[i])
682+
continue;
683+
netif_wake_queue(eth->netdev[i]);
684+
}
685+
}
686+
687+
static void mtk_stop_queue(struct mtk_eth *eth)
688+
{
689+
int i;
690+
691+
for (i = 0; i < MTK_MAC_COUNT; i++) {
692+
if (!eth->netdev[i])
693+
continue;
694+
netif_stop_queue(eth->netdev[i]);
695+
}
685696
}
686697

687698
static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -690,14 +701,22 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
690701
struct mtk_eth *eth = mac->hw;
691702
struct mtk_tx_ring *ring = &eth->tx_ring;
692703
struct net_device_stats *stats = &dev->stats;
704+
unsigned long flags;
693705
bool gso = false;
694706
int tx_num;
695707

708+
/* normally we can rely on the stack not calling this more than once,
709+
* however we have 2 queues running on the same ring so we need to lock
710+
* the ring access
711+
*/
712+
spin_lock_irqsave(&eth->page_lock, flags);
713+
696714
tx_num = mtk_cal_txd_req(skb);
697715
if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
698-
netif_stop_queue(dev);
716+
mtk_stop_queue(eth);
699717
netif_err(eth, tx_queued, dev,
700718
"Tx Ring full when queue awake!\n");
719+
spin_unlock_irqrestore(&eth->page_lock, flags);
701720
return NETDEV_TX_BUSY;
702721
}
703722

@@ -720,15 +739,17 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
720739
goto drop;
721740

722741
if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
723-
netif_stop_queue(dev);
742+
mtk_stop_queue(eth);
724743
if (unlikely(atomic_read(&ring->free_count) >
725744
ring->thresh))
726-
netif_wake_queue(dev);
745+
mtk_wake_queue(eth);
727746
}
747+
spin_unlock_irqrestore(&eth->page_lock, flags);
728748

729749
return NETDEV_TX_OK;
730750

731751
drop:
752+
spin_unlock_irqrestore(&eth->page_lock, flags);
732753
stats->tx_dropped++;
733754
dev_kfree_skb(skb);
734755
return NETDEV_TX_OK;
@@ -897,13 +918,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
897918
if (!total)
898919
return 0;
899920

900-
for (i = 0; i < MTK_MAC_COUNT; i++) {
901-
if (!eth->netdev[i] ||
902-
unlikely(!netif_queue_stopped(eth->netdev[i])))
903-
continue;
904-
if (atomic_read(&ring->free_count) > ring->thresh)
905-
netif_wake_queue(eth->netdev[i]);
906-
}
921+
if (atomic_read(&ring->free_count) > ring->thresh)
922+
mtk_wake_queue(eth);
907923

908924
return total;
909925
}
@@ -1176,7 +1192,7 @@ static void mtk_tx_timeout(struct net_device *dev)
11761192
eth->netdev[mac->id]->stats.tx_errors++;
11771193
netif_err(eth, tx_err, dev,
11781194
"transmit timed out\n");
1179-
schedule_work(&mac->pending_work);
1195+
schedule_work(&eth->pending_work);
11801196
}
11811197

11821198
static irqreturn_t mtk_handle_irq(int irq, void *_eth)
@@ -1413,19 +1429,30 @@ static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
14131429

14141430
static void mtk_pending_work(struct work_struct *work)
14151431
{
1416-
struct mtk_mac *mac = container_of(work, struct mtk_mac, pending_work);
1417-
struct mtk_eth *eth = mac->hw;
1418-
struct net_device *dev = eth->netdev[mac->id];
1419-
int err;
1432+
struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
1433+
int err, i;
1434+
unsigned long restart = 0;
14201435

14211436
rtnl_lock();
1422-
mtk_stop(dev);
14231437

1424-
err = mtk_open(dev);
1425-
if (err) {
1426-
netif_alert(eth, ifup, dev,
1427-
"Driver up/down cycle failed, closing device.\n");
1428-
dev_close(dev);
1438+
/* stop all devices to make sure that dma is properly shut down */
1439+
for (i = 0; i < MTK_MAC_COUNT; i++) {
1440+
if (!eth->netdev[i])
1441+
continue;
1442+
mtk_stop(eth->netdev[i]);
1443+
__set_bit(i, &restart);
1444+
}
1445+
1446+
/* restart DMA and enable IRQs */
1447+
for (i = 0; i < MTK_MAC_COUNT; i++) {
1448+
if (!test_bit(i, &restart))
1449+
continue;
1450+
err = mtk_open(eth->netdev[i]);
1451+
if (err) {
1452+
netif_alert(eth, ifup, eth->netdev[i],
1453+
"Driver up/down cycle failed, closing device.\n");
1454+
dev_close(eth->netdev[i]);
1455+
}
14291456
}
14301457
rtnl_unlock();
14311458
}
@@ -1435,15 +1462,13 @@ static int mtk_cleanup(struct mtk_eth *eth)
14351462
int i;
14361463

14371464
for (i = 0; i < MTK_MAC_COUNT; i++) {
1438-
struct mtk_mac *mac = netdev_priv(eth->netdev[i]);
1439-
14401465
if (!eth->netdev[i])
14411466
continue;
14421467

14431468
unregister_netdev(eth->netdev[i]);
14441469
free_netdev(eth->netdev[i]);
1445-
cancel_work_sync(&mac->pending_work);
14461470
}
1471+
cancel_work_sync(&eth->pending_work);
14471472

14481473
return 0;
14491474
}
@@ -1631,7 +1656,6 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
16311656
mac->id = id;
16321657
mac->hw = eth;
16331658
mac->of_node = np;
1634-
INIT_WORK(&mac->pending_work, mtk_pending_work);
16351659

16361660
mac->hw_stats = devm_kzalloc(eth->dev,
16371661
sizeof(*mac->hw_stats),
@@ -1645,6 +1669,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
16451669
mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
16461670

16471671
SET_NETDEV_DEV(eth->netdev[id], eth->dev);
1672+
eth->netdev[id]->watchdog_timeo = HZ;
16481673
eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
16491674
eth->netdev[id]->base_addr = (unsigned long)eth->base;
16501675
eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
@@ -1678,10 +1703,6 @@ static int mtk_probe(struct platform_device *pdev)
16781703
struct mtk_eth *eth;
16791704
int err;
16801705

1681-
err = device_reset(&pdev->dev);
1682-
if (err)
1683-
return err;
1684-
16851706
match = of_match_device(of_mtk_match, &pdev->dev);
16861707
soc = (struct mtk_soc_data *)match->data;
16871708

@@ -1736,6 +1757,7 @@ static int mtk_probe(struct platform_device *pdev)
17361757

17371758
eth->dev = &pdev->dev;
17381759
eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
1760+
INIT_WORK(&eth->pending_work, mtk_pending_work);
17391761

17401762
err = mtk_hw_init(eth);
17411763
if (err)

drivers/net/ethernet/mediatek/mtk_eth_soc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ struct mtk_rx_ring {
363363
* @clk_gp1: The gmac1 clock
364364
* @clk_gp2: The gmac2 clock
365365
* @mii_bus: If there is a bus we need to create an instance for it
366+
* @pending_work: The workqueue used to reset the dma ring
366367
*/
367368

368369
struct mtk_eth {
@@ -389,6 +390,7 @@ struct mtk_eth {
389390
struct clk *clk_gp1;
390391
struct clk *clk_gp2;
391392
struct mii_bus *mii_bus;
393+
struct work_struct pending_work;
392394
};
393395

394396
/* struct mtk_mac - the structure that holds the info about the MACs of the
@@ -398,15 +400,13 @@ struct mtk_eth {
398400
* @hw: Backpointer to our main datastruture
399401
* @hw_stats: Packet statistics counter
400402
* @phy_dev: The attached PHY if available
401-
* @pending_work: The workqueue used to reset the dma ring
402403
*/
403404
struct mtk_mac {
404405
int id;
405406
struct device_node *of_node;
406407
struct mtk_eth *hw;
407408
struct mtk_hw_stats *hw_stats;
408409
struct phy_device *phy_dev;
409-
struct work_struct pending_work;
410410
};
411411

412412
/* the struct describing the SoC. these are declared in the soc_xyz.c files */

0 commit comments

Comments
 (0)