Skip to content

Commit 6ae4e73

Browse files
shijujose4davem330
authored andcommitted
net: hns3: Add PCIe AER error recovery
This patch adds the error recovery for the HNS hw errors. Signed-off-by: Shiju Jose <[email protected]> Signed-off-by: Salil Mehta <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 5a9f0ea commit 6ae4e73

File tree

4 files changed

+35
-7
lines changed

4 files changed

+35
-7
lines changed

drivers/net/ethernet/hisilicon/hns3/hnae3.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ struct hnae3_ae_ops {
402402
int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
403403
u16 vlan, u8 qos, __be16 proto);
404404
int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
405-
void (*reset_event)(struct hnae3_handle *handle);
405+
void (*reset_event)(struct pci_dev *pdev, struct hnae3_handle *handle);
406406
void (*get_channels)(struct hnae3_handle *handle,
407407
struct ethtool_channels *ch);
408408
void (*get_tqps_and_rss_info)(struct hnae3_handle *h,

drivers/net/ethernet/hisilicon/hns3/hns3_enet.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/ipv6.h>
1010
#include <linux/module.h>
1111
#include <linux/pci.h>
12+
#include <linux/aer.h>
1213
#include <linux/skbuff.h>
1314
#include <linux/sctp.h>
1415
#include <linux/vermagic.h>
@@ -1613,7 +1614,7 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
16131614

16141615
/* request the reset */
16151616
if (h->ae_algo->ops->reset_event)
1616-
h->ae_algo->ops->reset_event(h);
1617+
h->ae_algo->ops->reset_event(h->pdev, h);
16171618
}
16181619

16191620
static const struct net_device_ops hns3_nic_netdev_ops = {
@@ -1796,8 +1797,25 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
17961797
return ret;
17971798
}
17981799

1800+
static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev)
1801+
{
1802+
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
1803+
struct device *dev = &pdev->dev;
1804+
1805+
dev_info(dev, "requesting reset due to PCI error\n");
1806+
1807+
/* request the reset */
1808+
if (ae_dev->ops->reset_event) {
1809+
ae_dev->ops->reset_event(pdev, NULL);
1810+
return PCI_ERS_RESULT_RECOVERED;
1811+
}
1812+
1813+
return PCI_ERS_RESULT_DISCONNECT;
1814+
}
1815+
17991816
static const struct pci_error_handlers hns3_err_handler = {
18001817
.error_detected = hns3_error_detected,
1818+
.slot_reset = hns3_slot_reset,
18011819
};
18021820

18031821
static struct pci_driver hns3_driver = {

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,12 +2489,18 @@ static void hclge_reset(struct hclge_dev *hdev)
24892489
ae_dev->reset_type = HNAE3_NONE_RESET;
24902490
}
24912491

2492-
static void hclge_reset_event(struct hnae3_handle *handle)
2492+
static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
24932493
{
2494-
struct hclge_vport *vport = hclge_get_vport(handle);
2495-
struct hclge_dev *hdev = vport->back;
2494+
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
2495+
struct hclge_dev *hdev = ae_dev->priv;
24962496

2497-
/* check if this is a new reset request and we are not here just because
2497+
/* We might end up getting called broadly because of 2 below cases:
2498+
* 1. Recoverable error was conveyed through APEI and only way to bring
2499+
* normalcy is to reset.
2500+
* 2. A new reset request from the stack due to timeout
2501+
*
2502+
* For the first case,error event might not have ae handle available.
2503+
* check if this is a new reset request and we are not here just because
24982504
* last reset attempt did not succeed and watchdog hit us again. We will
24992505
* know this if last reset request did not occur very recently (watchdog
25002506
* timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
@@ -2503,6 +2509,9 @@ static void hclge_reset_event(struct hnae3_handle *handle)
25032509
* want to make sure we throttle the reset request. Therefore, we will
25042510
* not allow it again before 3*HZ times.
25052511
*/
2512+
if (!handle)
2513+
handle = &hdev->vport[0].nic;
2514+
25062515
if (time_before(jiffies, (handle->last_reset_time + 3 * HZ)))
25072516
return;
25082517
else if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))

drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,8 @@ static int hclgevf_do_reset(struct hclgevf_dev *hdev)
12141214
return status;
12151215
}
12161216

1217-
static void hclgevf_reset_event(struct hnae3_handle *handle)
1217+
static void hclgevf_reset_event(struct pci_dev *pdev,
1218+
struct hnae3_handle *handle)
12181219
{
12191220
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
12201221

0 commit comments

Comments
 (0)