|
3 | 3 |
|
4 | 4 | #include "hclge_err.h"
|
5 | 5 |
|
| 6 | +static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { |
| 7 | + { .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" }, |
| 8 | + { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" }, |
| 9 | + { .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" }, |
| 10 | + { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" }, |
| 11 | + { .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" }, |
| 12 | + { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" }, |
| 13 | + { .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" }, |
| 14 | + { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" }, |
| 15 | + { .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" }, |
| 16 | + { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" }, |
| 17 | + { .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" }, |
| 18 | + { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" }, |
| 19 | + { .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" }, |
| 20 | + { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" }, |
| 21 | + { .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" }, |
| 22 | + { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" }, |
| 23 | + { /* sentinel */ } |
| 24 | +}; |
| 25 | + |
| 26 | +static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = { |
| 27 | + { .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" }, |
| 28 | + { .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" }, |
| 29 | + { /* sentinel */ } |
| 30 | +}; |
| 31 | + |
| 32 | +static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { |
| 33 | + { .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" }, |
| 34 | + { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" }, |
| 35 | + { .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" }, |
| 36 | + { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" }, |
| 37 | + { .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" }, |
| 38 | + { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" }, |
| 39 | + { .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" }, |
| 40 | + { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" }, |
| 41 | + { .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" }, |
| 42 | + { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" }, |
| 43 | + { .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" }, |
| 44 | + { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" }, |
| 45 | + { .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" }, |
| 46 | + { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" }, |
| 47 | + { .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" }, |
| 48 | + { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" }, |
| 49 | + { /* sentinel */ } |
| 50 | +}; |
| 51 | + |
| 52 | +static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = { |
| 53 | + { .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" }, |
| 54 | + { .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" }, |
| 55 | + { .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" }, |
| 56 | + { .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" }, |
| 57 | + { .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" }, |
| 58 | + { .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" }, |
| 59 | + { .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" }, |
| 60 | + { .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" }, |
| 61 | + { .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" }, |
| 62 | + { .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" }, |
| 63 | + { .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" }, |
| 64 | + { .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" }, |
| 65 | + { .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" }, |
| 66 | + { .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" }, |
| 67 | + { .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" }, |
| 68 | + { .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" }, |
| 69 | + { /* sentinel */ } |
| 70 | +}; |
| 71 | + |
| 72 | +static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { |
| 73 | + { .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" }, |
| 74 | + { .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" }, |
| 75 | + { .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" }, |
| 76 | + { .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" }, |
| 77 | + { .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" }, |
| 78 | + { .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" }, |
| 79 | + { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" }, |
| 80 | + { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" }, |
| 81 | + { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" }, |
| 82 | + { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" }, |
| 83 | + { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" }, |
| 84 | + { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" }, |
| 85 | + { /* sentinel */ } |
| 86 | +}; |
| 87 | + |
| 88 | +static void hclge_log_error(struct device *dev, |
| 89 | + const struct hclge_hw_error *err_list, |
| 90 | + u32 err_sts) |
| 91 | +{ |
| 92 | + const struct hclge_hw_error *err; |
| 93 | + int i = 0; |
| 94 | + |
| 95 | + while (err_list[i].msg) { |
| 96 | + err = &err_list[i]; |
| 97 | + if (!(err->int_msk & err_sts)) { |
| 98 | + i++; |
| 99 | + continue; |
| 100 | + } |
| 101 | + dev_warn(dev, "%s [error status=0x%x] found\n", |
| 102 | + err->msg, err_sts); |
| 103 | + i++; |
| 104 | + } |
| 105 | +} |
| 106 | + |
| 107 | +/* hclge_cmd_query_error: read the error information |
| 108 | + * @hdev: pointer to struct hclge_dev |
| 109 | + * @desc: descriptor for describing the command |
| 110 | + * @cmd: command opcode |
| 111 | + * @flag: flag for extended command structure |
| 112 | + * @w_num: offset for setting the read interrupt type. |
| 113 | + * @int_type: select which type of the interrupt for which the error |
| 114 | + * info will be read(RAS-CE/RAS-NFE/RAS-FE etc). |
| 115 | + * |
| 116 | + * This function query the error info from hw register/s using command |
| 117 | + */ |
| 118 | +static int hclge_cmd_query_error(struct hclge_dev *hdev, |
| 119 | + struct hclge_desc *desc, u32 cmd, |
| 120 | + u16 flag, u8 w_num, |
| 121 | + enum hclge_err_int_type int_type) |
| 122 | +{ |
| 123 | + struct device *dev = &hdev->pdev->dev; |
| 124 | + int num = 1; |
| 125 | + int ret; |
| 126 | + |
| 127 | + hclge_cmd_setup_basic_desc(&desc[0], cmd, true); |
| 128 | + if (flag) { |
| 129 | + desc[0].flag |= cpu_to_le16(flag); |
| 130 | + hclge_cmd_setup_basic_desc(&desc[1], cmd, true); |
| 131 | + num = 2; |
| 132 | + } |
| 133 | + if (w_num) |
| 134 | + desc[0].data[w_num] = cpu_to_le32(int_type); |
| 135 | + |
| 136 | + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); |
| 137 | + if (ret) |
| 138 | + dev_err(dev, "query error cmd failed (%d)\n", ret); |
| 139 | + |
| 140 | + return ret; |
| 141 | +} |
| 142 | + |
| 143 | +/* hclge_cmd_clear_error: clear the error status |
| 144 | + * @hdev: pointer to struct hclge_dev |
| 145 | + * @desc: descriptor for describing the command |
| 146 | + * @desc_src: prefilled descriptor from the previous command for reusing |
| 147 | + * @cmd: command opcode |
| 148 | + * @flag: flag for extended command structure |
| 149 | + * |
| 150 | + * This function clear the error status in the hw register/s using command |
| 151 | + */ |
| 152 | +static int hclge_cmd_clear_error(struct hclge_dev *hdev, |
| 153 | + struct hclge_desc *desc, |
| 154 | + struct hclge_desc *desc_src, |
| 155 | + u32 cmd, u16 flag) |
| 156 | +{ |
| 157 | + struct device *dev = &hdev->pdev->dev; |
| 158 | + int num = 1; |
| 159 | + int ret, i; |
| 160 | + |
| 161 | + if (cmd) { |
| 162 | + hclge_cmd_setup_basic_desc(&desc[0], cmd, false); |
| 163 | + if (flag) { |
| 164 | + desc[0].flag |= cpu_to_le16(flag); |
| 165 | + hclge_cmd_setup_basic_desc(&desc[1], cmd, false); |
| 166 | + num = 2; |
| 167 | + } |
| 168 | + if (desc_src) { |
| 169 | + for (i = 0; i < 6; i++) { |
| 170 | + desc[0].data[i] = desc_src[0].data[i]; |
| 171 | + if (flag) |
| 172 | + desc[1].data[i] = desc_src[1].data[i]; |
| 173 | + } |
| 174 | + } |
| 175 | + } else { |
| 176 | + hclge_cmd_reuse_desc(&desc[0], false); |
| 177 | + if (flag) { |
| 178 | + desc[0].flag |= cpu_to_le16(flag); |
| 179 | + hclge_cmd_reuse_desc(&desc[1], false); |
| 180 | + num = 2; |
| 181 | + } |
| 182 | + } |
| 183 | + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); |
| 184 | + if (ret) |
| 185 | + dev_err(dev, "clear error cmd failed (%d)\n", ret); |
| 186 | + |
| 187 | + return ret; |
| 188 | +} |
| 189 | + |
| 190 | +static int hclge_enable_common_error(struct hclge_dev *hdev, bool en) |
| 191 | +{ |
| 192 | + struct device *dev = &hdev->pdev->dev; |
| 193 | + struct hclge_desc desc[2]; |
| 194 | + int ret; |
| 195 | + |
| 196 | + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); |
| 197 | + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); |
| 198 | + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); |
| 199 | + |
| 200 | + if (en) { |
| 201 | + /* enable COMMON error interrupts */ |
| 202 | + desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); |
| 203 | + desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | |
| 204 | + HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); |
| 205 | + desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); |
| 206 | + desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN); |
| 207 | + desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); |
| 208 | + } else { |
| 209 | + /* disable COMMON error interrupts */ |
| 210 | + desc[0].data[0] = 0; |
| 211 | + desc[0].data[2] = 0; |
| 212 | + desc[0].data[3] = 0; |
| 213 | + desc[0].data[4] = 0; |
| 214 | + desc[0].data[5] = 0; |
| 215 | + } |
| 216 | + desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); |
| 217 | + desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | |
| 218 | + HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); |
| 219 | + desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); |
| 220 | + desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK); |
| 221 | + desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); |
| 222 | + |
| 223 | + ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); |
| 224 | + if (ret) |
| 225 | + dev_err(dev, |
| 226 | + "failed(%d) to enable/disable COMMON err interrupts\n", |
| 227 | + ret); |
| 228 | + |
| 229 | + return ret; |
| 230 | +} |
| 231 | + |
| 232 | +static void hclge_process_common_error(struct hclge_dev *hdev, |
| 233 | + enum hclge_err_int_type type) |
| 234 | +{ |
| 235 | + struct device *dev = &hdev->pdev->dev; |
| 236 | + struct hclge_desc desc[2]; |
| 237 | + u32 err_sts; |
| 238 | + int ret; |
| 239 | + |
| 240 | + /* read err sts */ |
| 241 | + ret = hclge_cmd_query_error(hdev, &desc[0], |
| 242 | + HCLGE_COMMON_ECC_INT_CFG, |
| 243 | + HCLGE_CMD_FLAG_NEXT, 0, 0); |
| 244 | + if (ret) { |
| 245 | + dev_err(dev, |
| 246 | + "failed(=%d) to query COMMON error interrupt status\n", |
| 247 | + ret); |
| 248 | + return; |
| 249 | + } |
| 250 | + |
| 251 | + /* log err */ |
| 252 | + err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK; |
| 253 | + hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts); |
| 254 | + |
| 255 | + err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK; |
| 256 | + hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts); |
| 257 | + |
| 258 | + err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT) |
| 259 | + & HCLGE_CMDQ_ECC_INT_MASK; |
| 260 | + hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts); |
| 261 | + |
| 262 | + if ((le32_to_cpu(desc[0].data[3])) & BIT(0)) |
| 263 | + dev_warn(dev, "imp_rd_data_poison_err found\n"); |
| 264 | + |
| 265 | + err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) & |
| 266 | + HCLGE_TQP_ECC_INT_MASK; |
| 267 | + hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts); |
| 268 | + |
| 269 | + err_sts = (le32_to_cpu(desc[0].data[5])) & |
| 270 | + HCLGE_IMP_ITCM4_ECC_INT_MASK; |
| 271 | + hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts); |
| 272 | + |
| 273 | + /* clear error interrupts */ |
| 274 | + desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK); |
| 275 | + desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK | |
| 276 | + HCLGE_CMDQ_ROCEE_ECC_CLR_MASK); |
| 277 | + desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK); |
| 278 | + desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK); |
| 279 | + |
| 280 | + ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0, |
| 281 | + HCLGE_CMD_FLAG_NEXT); |
| 282 | + if (ret) |
| 283 | + dev_err(dev, |
| 284 | + "failed(%d) to clear COMMON error interrupt status\n", |
| 285 | + ret); |
| 286 | +} |
| 287 | + |
6 | 288 | static const struct hclge_hw_blk hw_blk[] = {
|
| 289 | + { .msk = BIT(5), .name = "COMMON", |
| 290 | + .enable_error = hclge_enable_common_error, |
| 291 | + .process_error = hclge_process_common_error, }, |
7 | 292 | { /* sentinel */ }
|
8 | 293 | };
|
9 | 294 |
|
|
0 commit comments