Skip to content

Commit bafed3f

Browse files
Kalesh APdavem330
authored andcommitted
bnxt_en: implement hw health reporter
This reporter will report NVM errors which are non-fatal. When we receive these NVM error events, we'll report it through this new hw health reporter. Reviewed-by: Edwin Peer <[email protected]> Signed-off-by: Kalesh AP <[email protected]> Signed-off-by: Michael Chan <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f16a916 commit bafed3f

File tree

4 files changed

+126
-0
lines changed

4 files changed

+126
-0
lines changed

drivers/net/ethernet/broadcom/bnxt/bnxt.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2061,6 +2061,22 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
20612061
case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD:
20622062
netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n");
20632063
break;
2064+
case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM: {
2065+
struct bnxt_hw_health *hw_health = &bp->hw_health;
2066+
2067+
hw_health->nvm_err_address = EVENT_DATA2_NVM_ERR_ADDR(data2);
2068+
if (EVENT_DATA1_NVM_ERR_TYPE_WRITE(data1)) {
2069+
hw_health->synd = BNXT_HW_STATUS_NVM_WRITE_ERR;
2070+
hw_health->nvm_write_errors++;
2071+
} else if (EVENT_DATA1_NVM_ERR_TYPE_ERASE(data1)) {
2072+
hw_health->synd = BNXT_HW_STATUS_NVM_ERASE_ERR;
2073+
hw_health->nvm_erase_errors++;
2074+
} else {
2075+
hw_health->synd = BNXT_HW_STATUS_NVM_UNKNOWN_ERR;
2076+
}
2077+
set_bit(BNXT_FW_NVM_ERR_SP_EVENT, &bp->sp_event);
2078+
break;
2079+
}
20642080
default:
20652081
netdev_err(bp->dev, "FW reported unknown error type %u\n",
20662082
err_type);
@@ -11887,6 +11903,9 @@ static void bnxt_sp_task(struct work_struct *work)
1188711903
if (test_and_clear_bit(BNXT_FW_ECHO_REQUEST_SP_EVENT, &bp->sp_event))
1188811904
bnxt_fw_echo_reply(bp);
1188911905

11906+
if (test_and_clear_bit(BNXT_FW_NVM_ERR_SP_EVENT, &bp->sp_event))
11907+
bnxt_devlink_health_hw_report(bp);
11908+
1189011909
/* These functions below will clear BNXT_STATE_IN_SP_TASK. They
1189111910
* must be the last functions to be called before exiting.
1189211911
*/

drivers/net/ethernet/broadcom/bnxt/bnxt.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,21 @@ struct rx_tpa_end_cmp_ext {
516516
ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_MASK) >>\
517517
ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_SFT)
518518

519+
#define EVENT_DATA2_NVM_ERR_ADDR(data2) \
520+
(((data2) & \
521+
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_MASK) >>\
522+
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_SFT)
523+
524+
#define EVENT_DATA1_NVM_ERR_TYPE_WRITE(data1) \
525+
(((data1) & \
526+
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_MASK) ==\
527+
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_WRITE)
528+
529+
#define EVENT_DATA1_NVM_ERR_TYPE_ERASE(data1) \
530+
(((data1) & \
531+
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_MASK) ==\
532+
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE)
533+
519534
struct nqe_cn {
520535
__le16 type;
521536
#define NQ_CN_TYPE_MASK 0x3fUL
@@ -1528,6 +1543,21 @@ struct bnxt_ctx_mem_info {
15281543
struct bnxt_mem_init mem_init[BNXT_CTX_MEM_INIT_MAX];
15291544
};
15301545

1546+
enum bnxt_hw_err {
1547+
BNXT_HW_STATUS_HEALTHY = 0x0,
1548+
BNXT_HW_STATUS_NVM_WRITE_ERR = 0x1,
1549+
BNXT_HW_STATUS_NVM_ERASE_ERR = 0x2,
1550+
BNXT_HW_STATUS_NVM_UNKNOWN_ERR = 0x3,
1551+
};
1552+
1553+
struct bnxt_hw_health {
1554+
u32 nvm_err_address;
1555+
u32 nvm_write_errors;
1556+
u32 nvm_erase_errors;
1557+
u8 synd;
1558+
struct devlink_health_reporter *hw_reporter;
1559+
};
1560+
15311561
enum bnxt_health_severity {
15321562
SEVERITY_NORMAL = 0,
15331563
SEVERITY_WARNING,
@@ -2045,6 +2075,7 @@ struct bnxt {
20452075
#define BNXT_FW_EXCEPTION_SP_EVENT 19
20462076
#define BNXT_LINK_CFG_CHANGE_SP_EVENT 21
20472077
#define BNXT_FW_ECHO_REQUEST_SP_EVENT 23
2078+
#define BNXT_FW_NVM_ERR_SP_EVENT 25
20482079

20492080
struct delayed_work fw_reset_task;
20502081
int fw_reset_state;
@@ -2145,6 +2176,8 @@ struct bnxt {
21452176
struct dentry *debugfs_pdev;
21462177
struct device *hwmon_dev;
21472178
enum board_idx board_idx;
2179+
2180+
struct bnxt_hw_health hw_health;
21482181
};
21492182

21502183
#define BNXT_NUM_RX_RING_STATS 8

drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,69 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
241241
.recover = bnxt_fw_recover,
242242
};
243243

244+
static int bnxt_hw_recover(struct devlink_health_reporter *reporter,
245+
void *priv_ctx,
246+
struct netlink_ext_ack *extack)
247+
{
248+
struct bnxt *bp = devlink_health_reporter_priv(reporter);
249+
struct bnxt_hw_health *hw_health = &bp->hw_health;
250+
251+
hw_health->synd = BNXT_HW_STATUS_HEALTHY;
252+
return 0;
253+
}
254+
255+
static const char *hw_err_str(u8 synd)
256+
{
257+
switch (synd) {
258+
case BNXT_HW_STATUS_HEALTHY:
259+
return "healthy";
260+
case BNXT_HW_STATUS_NVM_WRITE_ERR:
261+
return "nvm write error";
262+
case BNXT_HW_STATUS_NVM_ERASE_ERR:
263+
return "nvm erase error";
264+
case BNXT_HW_STATUS_NVM_UNKNOWN_ERR:
265+
return "unrecognized nvm error";
266+
default:
267+
return "unknown hw error";
268+
}
269+
}
270+
271+
static int bnxt_hw_diagnose(struct devlink_health_reporter *reporter,
272+
struct devlink_fmsg *fmsg,
273+
struct netlink_ext_ack *extack)
274+
{
275+
struct bnxt *bp = devlink_health_reporter_priv(reporter);
276+
struct bnxt_hw_health *h = &bp->hw_health;
277+
int rc;
278+
279+
rc = devlink_fmsg_string_pair_put(fmsg, "Status", hw_err_str(h->synd));
280+
if (rc)
281+
return rc;
282+
rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_write_errors", h->nvm_write_errors);
283+
if (rc)
284+
return rc;
285+
rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_erase_errors", h->nvm_erase_errors);
286+
if (rc)
287+
return rc;
288+
return 0;
289+
}
290+
291+
void bnxt_devlink_health_hw_report(struct bnxt *bp)
292+
{
293+
struct bnxt_hw_health *hw_health = &bp->hw_health;
294+
295+
netdev_warn(bp->dev, "%s reported at address 0x%x\n", hw_err_str(hw_health->synd),
296+
hw_health->nvm_err_address);
297+
298+
devlink_health_report(hw_health->hw_reporter, hw_err_str(hw_health->synd), NULL);
299+
}
300+
301+
static const struct devlink_health_reporter_ops bnxt_dl_hw_reporter_ops = {
302+
.name = "hw",
303+
.diagnose = bnxt_hw_diagnose,
304+
.recover = bnxt_hw_recover,
305+
};
306+
244307
static struct devlink_health_reporter *
245308
__bnxt_dl_reporter_create(struct bnxt *bp,
246309
const struct devlink_health_reporter_ops *ops)
@@ -260,6 +323,10 @@ __bnxt_dl_reporter_create(struct bnxt *bp,
260323
void bnxt_dl_fw_reporters_create(struct bnxt *bp)
261324
{
262325
struct bnxt_fw_health *fw_health = bp->fw_health;
326+
struct bnxt_hw_health *hw_health = &bp->hw_health;
327+
328+
if (!hw_health->hw_reporter)
329+
hw_health->hw_reporter = __bnxt_dl_reporter_create(bp, &bnxt_dl_hw_reporter_ops);
263330

264331
if (fw_health && !fw_health->fw_reporter)
265332
fw_health->fw_reporter = __bnxt_dl_reporter_create(bp, &bnxt_dl_fw_reporter_ops);
@@ -268,6 +335,12 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
268335
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
269336
{
270337
struct bnxt_fw_health *fw_health = bp->fw_health;
338+
struct bnxt_hw_health *hw_health = &bp->hw_health;
339+
340+
if (hw_health->hw_reporter) {
341+
devlink_health_reporter_destroy(hw_health->hw_reporter);
342+
hw_health->hw_reporter = NULL;
343+
}
271344

272345
if (fw_health && fw_health->fw_reporter) {
273346
devlink_health_reporter_destroy(fw_health->fw_reporter);

drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ enum bnxt_dl_version_type {
7474
void bnxt_devlink_health_fw_report(struct bnxt *bp);
7575
void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy);
7676
void bnxt_dl_health_fw_recovery_done(struct bnxt *bp);
77+
void bnxt_devlink_health_hw_report(struct bnxt *bp);
7778
void bnxt_dl_fw_reporters_create(struct bnxt *bp);
7879
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp);
7980
int bnxt_dl_register(struct bnxt *bp);

0 commit comments

Comments
 (0)