|
1 | 1 | // SPDX-License-Identifier: GPL-2.0-only
|
| 2 | +#include <linux/ras.h> |
2 | 3 | #include "amd64_edac.h"
|
3 | 4 | #include <asm/amd_nb.h>
|
4 | 5 |
|
@@ -1051,281 +1052,6 @@ static int fixup_node_id(int node_id, struct mce *m)
|
1051 | 1052 | return nid - gpu_node_map.base_node_id + 1;
|
1052 | 1053 | }
|
1053 | 1054 |
|
1054 |
| -/* Protect the PCI config register pairs used for DF indirect access. */ |
1055 |
| -static DEFINE_MUTEX(df_indirect_mutex); |
1056 |
| - |
1057 |
| -/* |
1058 |
| - * Data Fabric Indirect Access uses FICAA/FICAD. |
1059 |
| - * |
1060 |
| - * Fabric Indirect Configuration Access Address (FICAA): Constructed based |
1061 |
| - * on the device's Instance Id and the PCI function and register offset of |
1062 |
| - * the desired register. |
1063 |
| - * |
1064 |
| - * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO |
1065 |
| - * and FICAD HI registers but so far we only need the LO register. |
1066 |
| - * |
1067 |
| - * Use Instance Id 0xFF to indicate a broadcast read. |
1068 |
| - */ |
1069 |
| -#define DF_BROADCAST 0xFF |
1070 |
| -static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) |
1071 |
| -{ |
1072 |
| - struct pci_dev *F4; |
1073 |
| - u32 ficaa; |
1074 |
| - int err = -ENODEV; |
1075 |
| - |
1076 |
| - if (node >= amd_nb_num()) |
1077 |
| - goto out; |
1078 |
| - |
1079 |
| - F4 = node_to_amd_nb(node)->link; |
1080 |
| - if (!F4) |
1081 |
| - goto out; |
1082 |
| - |
1083 |
| - ficaa = (instance_id == DF_BROADCAST) ? 0 : 1; |
1084 |
| - ficaa |= reg & 0x3FC; |
1085 |
| - ficaa |= (func & 0x7) << 11; |
1086 |
| - ficaa |= instance_id << 16; |
1087 |
| - |
1088 |
| - mutex_lock(&df_indirect_mutex); |
1089 |
| - |
1090 |
| - err = pci_write_config_dword(F4, 0x5C, ficaa); |
1091 |
| - if (err) { |
1092 |
| - pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); |
1093 |
| - goto out_unlock; |
1094 |
| - } |
1095 |
| - |
1096 |
| - err = pci_read_config_dword(F4, 0x98, lo); |
1097 |
| - if (err) |
1098 |
| - pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); |
1099 |
| - |
1100 |
| -out_unlock: |
1101 |
| - mutex_unlock(&df_indirect_mutex); |
1102 |
| - |
1103 |
| -out: |
1104 |
| - return err; |
1105 |
| -} |
1106 |
| - |
1107 |
| -static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) |
1108 |
| -{ |
1109 |
| - return __df_indirect_read(node, func, reg, instance_id, lo); |
1110 |
| -} |
1111 |
| - |
1112 |
| -static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) |
1113 |
| -{ |
1114 |
| - return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); |
1115 |
| -} |
1116 |
| - |
1117 |
| -struct addr_ctx { |
1118 |
| - u64 ret_addr; |
1119 |
| - u32 tmp; |
1120 |
| - u16 nid; |
1121 |
| - u8 inst_id; |
1122 |
| -}; |
1123 |
| - |
1124 |
| -static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) |
1125 |
| -{ |
1126 |
| - u64 dram_base_addr, dram_limit_addr, dram_hole_base; |
1127 |
| - |
1128 |
| - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; |
1129 |
| - u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; |
1130 |
| - u8 intlv_addr_sel, intlv_addr_bit; |
1131 |
| - u8 num_intlv_bits, hashed_bit; |
1132 |
| - u8 lgcy_mmio_hole_en, base = 0; |
1133 |
| - u8 cs_mask, cs_id = 0; |
1134 |
| - bool hash_enabled = false; |
1135 |
| - |
1136 |
| - struct addr_ctx ctx; |
1137 |
| - |
1138 |
| - memset(&ctx, 0, sizeof(ctx)); |
1139 |
| - |
1140 |
| - /* Start from the normalized address */ |
1141 |
| - ctx.ret_addr = norm_addr; |
1142 |
| - |
1143 |
| - ctx.nid = nid; |
1144 |
| - ctx.inst_id = umc; |
1145 |
| - |
1146 |
| - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ |
1147 |
| - if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) |
1148 |
| - goto out_err; |
1149 |
| - |
1150 |
| - /* Remove HiAddrOffset from normalized address, if enabled: */ |
1151 |
| - if (ctx.tmp & BIT(0)) { |
1152 |
| - u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8; |
1153 |
| - |
1154 |
| - if (norm_addr >= hi_addr_offset) { |
1155 |
| - ctx.ret_addr -= hi_addr_offset; |
1156 |
| - base = 1; |
1157 |
| - } |
1158 |
| - } |
1159 |
| - |
1160 |
| - /* Read D18F0x110 (DramBaseAddress). */ |
1161 |
| - if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp)) |
1162 |
| - goto out_err; |
1163 |
| - |
1164 |
| - /* Check if address range is valid. */ |
1165 |
| - if (!(ctx.tmp & BIT(0))) { |
1166 |
| - pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", |
1167 |
| - __func__, ctx.tmp); |
1168 |
| - goto out_err; |
1169 |
| - } |
1170 |
| - |
1171 |
| - lgcy_mmio_hole_en = ctx.tmp & BIT(1); |
1172 |
| - intlv_num_chan = (ctx.tmp >> 4) & 0xF; |
1173 |
| - intlv_addr_sel = (ctx.tmp >> 8) & 0x7; |
1174 |
| - dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16; |
1175 |
| - |
1176 |
| - /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ |
1177 |
| - if (intlv_addr_sel > 3) { |
1178 |
| - pr_err("%s: Invalid interleave address select %d.\n", |
1179 |
| - __func__, intlv_addr_sel); |
1180 |
| - goto out_err; |
1181 |
| - } |
1182 |
| - |
1183 |
| - /* Read D18F0x114 (DramLimitAddress). */ |
1184 |
| - if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) |
1185 |
| - goto out_err; |
1186 |
| - |
1187 |
| - intlv_num_sockets = (ctx.tmp >> 8) & 0x1; |
1188 |
| - intlv_num_dies = (ctx.tmp >> 10) & 0x3; |
1189 |
| - dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); |
1190 |
| - |
1191 |
| - intlv_addr_bit = intlv_addr_sel + 8; |
1192 |
| - |
1193 |
| - /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ |
1194 |
| - switch (intlv_num_chan) { |
1195 |
| - case 0: intlv_num_chan = 0; break; |
1196 |
| - case 1: intlv_num_chan = 1; break; |
1197 |
| - case 3: intlv_num_chan = 2; break; |
1198 |
| - case 5: intlv_num_chan = 3; break; |
1199 |
| - case 7: intlv_num_chan = 4; break; |
1200 |
| - |
1201 |
| - case 8: intlv_num_chan = 1; |
1202 |
| - hash_enabled = true; |
1203 |
| - break; |
1204 |
| - default: |
1205 |
| - pr_err("%s: Invalid number of interleaved channels %d.\n", |
1206 |
| - __func__, intlv_num_chan); |
1207 |
| - goto out_err; |
1208 |
| - } |
1209 |
| - |
1210 |
| - num_intlv_bits = intlv_num_chan; |
1211 |
| - |
1212 |
| - if (intlv_num_dies > 2) { |
1213 |
| - pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", |
1214 |
| - __func__, intlv_num_dies); |
1215 |
| - goto out_err; |
1216 |
| - } |
1217 |
| - |
1218 |
| - num_intlv_bits += intlv_num_dies; |
1219 |
| - |
1220 |
| - /* Add a bit if sockets are interleaved. */ |
1221 |
| - num_intlv_bits += intlv_num_sockets; |
1222 |
| - |
1223 |
| - /* Assert num_intlv_bits <= 4 */ |
1224 |
| - if (num_intlv_bits > 4) { |
1225 |
| - pr_err("%s: Invalid interleave bits %d.\n", |
1226 |
| - __func__, num_intlv_bits); |
1227 |
| - goto out_err; |
1228 |
| - } |
1229 |
| - |
1230 |
| - if (num_intlv_bits > 0) { |
1231 |
| - u64 temp_addr_x, temp_addr_i, temp_addr_y; |
1232 |
| - u8 die_id_bit, sock_id_bit, cs_fabric_id; |
1233 |
| - |
1234 |
| - /* |
1235 |
| - * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. |
1236 |
| - * This is the fabric id for this coherent slave. Use |
1237 |
| - * umc/channel# as instance id of the coherent slave |
1238 |
| - * for FICAA. |
1239 |
| - */ |
1240 |
| - if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) |
1241 |
| - goto out_err; |
1242 |
| - |
1243 |
| - cs_fabric_id = (ctx.tmp >> 8) & 0xFF; |
1244 |
| - die_id_bit = 0; |
1245 |
| - |
1246 |
| - /* If interleaved over more than 1 channel: */ |
1247 |
| - if (intlv_num_chan) { |
1248 |
| - die_id_bit = intlv_num_chan; |
1249 |
| - cs_mask = (1 << die_id_bit) - 1; |
1250 |
| - cs_id = cs_fabric_id & cs_mask; |
1251 |
| - } |
1252 |
| - |
1253 |
| - sock_id_bit = die_id_bit; |
1254 |
| - |
1255 |
| - /* Read D18F1x208 (SystemFabricIdMask). */ |
1256 |
| - if (intlv_num_dies || intlv_num_sockets) |
1257 |
| - if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp)) |
1258 |
| - goto out_err; |
1259 |
| - |
1260 |
| - /* If interleaved over more than 1 die. */ |
1261 |
| - if (intlv_num_dies) { |
1262 |
| - sock_id_bit = die_id_bit + intlv_num_dies; |
1263 |
| - die_id_shift = (ctx.tmp >> 24) & 0xF; |
1264 |
| - die_id_mask = (ctx.tmp >> 8) & 0xFF; |
1265 |
| - |
1266 |
| - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; |
1267 |
| - } |
1268 |
| - |
1269 |
| - /* If interleaved over more than 1 socket. */ |
1270 |
| - if (intlv_num_sockets) { |
1271 |
| - socket_id_shift = (ctx.tmp >> 28) & 0xF; |
1272 |
| - socket_id_mask = (ctx.tmp >> 16) & 0xFF; |
1273 |
| - |
1274 |
| - cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; |
1275 |
| - } |
1276 |
| - |
1277 |
| - /* |
1278 |
| - * The pre-interleaved address consists of XXXXXXIIIYYYYY |
1279 |
| - * where III is the ID for this CS, and XXXXXXYYYYY are the |
1280 |
| - * address bits from the post-interleaved address. |
1281 |
| - * "num_intlv_bits" has been calculated to tell us how many "I" |
1282 |
| - * bits there are. "intlv_addr_bit" tells us how many "Y" bits |
1283 |
| - * there are (where "I" starts). |
1284 |
| - */ |
1285 |
| - temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0); |
1286 |
| - temp_addr_i = (cs_id << intlv_addr_bit); |
1287 |
| - temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; |
1288 |
| - ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; |
1289 |
| - } |
1290 |
| - |
1291 |
| - /* Add dram base address */ |
1292 |
| - ctx.ret_addr += dram_base_addr; |
1293 |
| - |
1294 |
| - /* If legacy MMIO hole enabled */ |
1295 |
| - if (lgcy_mmio_hole_en) { |
1296 |
| - if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp)) |
1297 |
| - goto out_err; |
1298 |
| - |
1299 |
| - dram_hole_base = ctx.tmp & GENMASK(31, 24); |
1300 |
| - if (ctx.ret_addr >= dram_hole_base) |
1301 |
| - ctx.ret_addr += (BIT_ULL(32) - dram_hole_base); |
1302 |
| - } |
1303 |
| - |
1304 |
| - if (hash_enabled) { |
1305 |
| - /* Save some parentheses and grab ls-bit at the end. */ |
1306 |
| - hashed_bit = (ctx.ret_addr >> 12) ^ |
1307 |
| - (ctx.ret_addr >> 18) ^ |
1308 |
| - (ctx.ret_addr >> 21) ^ |
1309 |
| - (ctx.ret_addr >> 30) ^ |
1310 |
| - cs_id; |
1311 |
| - |
1312 |
| - hashed_bit &= BIT(0); |
1313 |
| - |
1314 |
| - if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) |
1315 |
| - ctx.ret_addr ^= BIT(intlv_addr_bit); |
1316 |
| - } |
1317 |
| - |
1318 |
| - /* Is calculated system address is above DRAM limit address? */ |
1319 |
| - if (ctx.ret_addr > dram_limit_addr) |
1320 |
| - goto out_err; |
1321 |
| - |
1322 |
| - *sys_addr = ctx.ret_addr; |
1323 |
| - return 0; |
1324 |
| - |
1325 |
| -out_err: |
1326 |
| - return -EINVAL; |
1327 |
| -} |
1328 |
| - |
1329 | 1055 | static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
|
1330 | 1056 |
|
1331 | 1057 | /*
|
@@ -3073,9 +2799,10 @@ static void decode_umc_error(int node_id, struct mce *m)
|
3073 | 2799 | {
|
3074 | 2800 | u8 ecc_type = (m->status >> 45) & 0x3;
|
3075 | 2801 | struct mem_ctl_info *mci;
|
| 2802 | + unsigned long sys_addr; |
3076 | 2803 | struct amd64_pvt *pvt;
|
| 2804 | + struct atl_err a_err; |
3077 | 2805 | struct err_info err;
|
3078 |
| - u64 sys_addr; |
3079 | 2806 |
|
3080 | 2807 | node_id = fixup_node_id(node_id, m);
|
3081 | 2808 |
|
@@ -3106,7 +2833,12 @@ static void decode_umc_error(int node_id, struct mce *m)
|
3106 | 2833 |
|
3107 | 2834 | pvt->ops->get_err_info(m, &err);
|
3108 | 2835 |
|
3109 |
| - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { |
| 2836 | + a_err.addr = m->addr; |
| 2837 | + a_err.ipid = m->ipid; |
| 2838 | + a_err.cpu = m->extcpu; |
| 2839 | + |
| 2840 | + sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); |
| 2841 | + if (IS_ERR_VALUE(sys_addr)) { |
3110 | 2842 | err.err_code = ERR_NORM_ADDR;
|
3111 | 2843 | goto log_error;
|
3112 | 2844 | }
|
|
0 commit comments