Skip to content

Commit 5cb96c2

Browse files
vburrudavem330
authored andcommitted
octeon_ep: add heartbeat monitor
Monitor periodic heartbeat messages from device firmware. Presence of heartbeat indicates the device is active and running. If the heartbeat is missed for configured interval indicates firmware has crashed and device is unusable; in this case, PF driver stops and uninitialize the device. Signed-off-by: Veerasenareddy Burru <[email protected]> Signed-off-by: Abhijit Ayarekar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 0718693 commit 5cb96c2

File tree

5 files changed

+66
-2
lines changed

5 files changed

+66
-2
lines changed

drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#define CTRL_MBOX_MAX_PF 128
1717
#define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF))
1818

19+
#define FW_HB_INTERVAL_IN_SECS 1
20+
#define FW_HB_MISS_COUNT 10
21+
1922
/* Names of Hardware non-queue generic interrupts */
2023
static char *cn93_non_ioq_msix_names[] = {
2124
"epf_ire_rint",
@@ -249,6 +252,10 @@ static void octep_init_config_cn93_pf(struct octep_device *oct)
249252
conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr +
250253
(0x400000ull * 7) +
251254
(link * CTRL_MBOX_SZ);
255+
256+
conf->hb_interval = FW_HB_INTERVAL_IN_SECS;
257+
conf->max_hb_miss_cnt = FW_HB_MISS_COUNT;
258+
252259
}
253260

254261
/* Setup registers for a hardware Tx Queue */
@@ -383,6 +390,8 @@ static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct)
383390
octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0);
384391
if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
385392
queue_work(octep_wq, &oct->ctrl_mbox_task);
393+
else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
394+
atomic_set(&oct->hb_miss_cnt, 0);
386395

387396
handled = true;
388397
}

drivers/net/ethernet/marvell/octeon_ep/octep_config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,5 +200,11 @@ struct octep_config {
200200

201201
/* ctrl mbox config */
202202
struct octep_ctrl_mbox_config ctrl_mbox_cfg;
203+
204+
/* Configured maximum heartbeat miss count */
205+
u32 max_hb_miss_cnt;
206+
207+
/* Configured firmware heartbeat interval in secs */
208+
u32 hb_interval;
203209
};
204210
#endif /* _OCTEP_CONFIG_H_ */

drivers/net/ethernet/marvell/octeon_ep/octep_main.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,37 @@ static void octep_intr_poll_task(struct work_struct *work)
901901
msecs_to_jiffies(OCTEP_INTR_POLL_TIME_MSECS));
902902
}
903903

904+
/**
905+
* octep_hb_timeout_task - work queue task to check firmware heartbeat.
906+
*
907+
* @work: pointer to hb work_struct
908+
*
909+
* Check for heartbeat miss count. Uninitialize oct device if miss count
910+
* exceeds configured max heartbeat miss count.
911+
*
912+
**/
913+
static void octep_hb_timeout_task(struct work_struct *work)
914+
{
915+
struct octep_device *oct = container_of(work, struct octep_device,
916+
hb_task.work);
917+
918+
int miss_cnt;
919+
920+
miss_cnt = atomic_inc_return(&oct->hb_miss_cnt);
921+
if (miss_cnt < oct->conf->max_hb_miss_cnt) {
922+
queue_delayed_work(octep_wq, &oct->hb_task,
923+
msecs_to_jiffies(oct->conf->hb_interval * 1000));
924+
return;
925+
}
926+
927+
dev_err(&oct->pdev->dev, "Missed %u heartbeats. Uninitializing\n",
928+
miss_cnt);
929+
rtnl_lock();
930+
if (netif_running(oct->netdev))
931+
octep_stop(oct->netdev);
932+
rtnl_unlock();
933+
}
934+
904935
/**
905936
* octep_ctrl_mbox_task - work queue task to handle ctrl mbox messages.
906937
*
@@ -938,7 +969,7 @@ static const char *octep_devid_to_str(struct octep_device *oct)
938969
int octep_device_setup(struct octep_device *oct)
939970
{
940971
struct pci_dev *pdev = oct->pdev;
941-
int i;
972+
int i, ret;
942973

943974
/* allocate memory for oct->conf */
944975
oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL);
@@ -973,7 +1004,15 @@ int octep_device_setup(struct octep_device *oct)
9731004

9741005
oct->pkind = CFG_GET_IQ_PKIND(oct->conf);
9751006

976-
return octep_ctrl_net_init(oct);
1007+
ret = octep_ctrl_net_init(oct);
1008+
if (ret)
1009+
return ret;
1010+
1011+
atomic_set(&oct->hb_miss_cnt, 0);
1012+
INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task);
1013+
queue_delayed_work(octep_wq, &oct->hb_task,
1014+
msecs_to_jiffies(oct->conf->hb_interval * 1000));
1015+
return 0;
9771016

9781017
unsupported_dev:
9791018
for (i = 0; i < OCTEP_MMIO_REGIONS; i++)
@@ -1002,6 +1041,7 @@ static void octep_device_cleanup(struct octep_device *oct)
10021041
}
10031042

10041043
octep_ctrl_net_uninit(oct);
1044+
cancel_delayed_work_sync(&oct->hb_task);
10051045

10061046
oct->hw_ops.soft_reset(oct);
10071047
for (i = 0; i < OCTEP_MMIO_REGIONS; i++) {

drivers/net/ethernet/marvell/octeon_ep/octep_main.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,13 @@ struct octep_device {
280280
bool poll_non_ioq_intr;
281281
/* Work entry to poll non-ioq interrupts */
282282
struct delayed_work intr_poll_task;
283+
284+
/* Firmware heartbeat timer */
285+
struct timer_list hb_timer;
286+
/* Firmware heartbeat miss count tracked by timer */
287+
atomic_t hb_miss_cnt;
288+
/* Task to reset device on heartbeat miss */
289+
struct delayed_work hb_task;
283290
};
284291

285292
static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct)

drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,5 +367,7 @@
367367

368368
/* bit 0 for control mbox interrupt */
369369
#define CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX BIT_ULL(0)
370+
/* bit 1 for firmware heartbeat interrupt */
371+
#define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1)
370372

371373
#endif /* _OCTEP_REGS_CN9K_PF_H_ */

0 commit comments

Comments
 (0)