Skip to content

Commit 6c9058f

Browse files
yghannambp3tk0v
authored andcommitted
EDAC/amd64: Use new AMD Address Translation Library
Remove old address translation code and use the new AMD Address Translation Library. Use "imply" in Kconfig so that the "AMD_ATL" config option takes the value of "EDAC_AMD64" as its default. Signed-off-by: Yazen Ghannam <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 3f31749 commit 6c9058f

File tree

2 files changed

+10
-277
lines changed

2 files changed

+10
-277
lines changed

drivers/edac/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ config EDAC_GHES
7878
config EDAC_AMD64
7979
tristate "AMD64 (Opteron, Athlon64)"
8080
depends on AMD_NB && EDAC_DECODE_MCE
81+
imply AMD_ATL
8182
help
8283
Support for error detection and correction of DRAM ECC errors on
8384
the AMD64 families (>= K8) of memory controllers.

drivers/edac/amd64_edac.c

Lines changed: 9 additions & 277 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// SPDX-License-Identifier: GPL-2.0-only
2+
#include <linux/ras.h>
23
#include "amd64_edac.h"
34
#include <asm/amd_nb.h>
45

@@ -1051,281 +1052,6 @@ static int fixup_node_id(int node_id, struct mce *m)
10511052
return nid - gpu_node_map.base_node_id + 1;
10521053
}
10531054

1054-
/* Protect the PCI config register pairs used for DF indirect access. */
1055-
static DEFINE_MUTEX(df_indirect_mutex);
1056-
1057-
/*
1058-
* Data Fabric Indirect Access uses FICAA/FICAD.
1059-
*
1060-
* Fabric Indirect Configuration Access Address (FICAA): Constructed based
1061-
* on the device's Instance Id and the PCI function and register offset of
1062-
* the desired register.
1063-
*
1064-
* Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
1065-
* and FICAD HI registers but so far we only need the LO register.
1066-
*
1067-
* Use Instance Id 0xFF to indicate a broadcast read.
1068-
*/
1069-
#define DF_BROADCAST 0xFF
1070-
static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
1071-
{
1072-
struct pci_dev *F4;
1073-
u32 ficaa;
1074-
int err = -ENODEV;
1075-
1076-
if (node >= amd_nb_num())
1077-
goto out;
1078-
1079-
F4 = node_to_amd_nb(node)->link;
1080-
if (!F4)
1081-
goto out;
1082-
1083-
ficaa = (instance_id == DF_BROADCAST) ? 0 : 1;
1084-
ficaa |= reg & 0x3FC;
1085-
ficaa |= (func & 0x7) << 11;
1086-
ficaa |= instance_id << 16;
1087-
1088-
mutex_lock(&df_indirect_mutex);
1089-
1090-
err = pci_write_config_dword(F4, 0x5C, ficaa);
1091-
if (err) {
1092-
pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
1093-
goto out_unlock;
1094-
}
1095-
1096-
err = pci_read_config_dword(F4, 0x98, lo);
1097-
if (err)
1098-
pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
1099-
1100-
out_unlock:
1101-
mutex_unlock(&df_indirect_mutex);
1102-
1103-
out:
1104-
return err;
1105-
}
1106-
1107-
static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
1108-
{
1109-
return __df_indirect_read(node, func, reg, instance_id, lo);
1110-
}
1111-
1112-
static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo)
1113-
{
1114-
return __df_indirect_read(node, func, reg, DF_BROADCAST, lo);
1115-
}
1116-
1117-
struct addr_ctx {
1118-
u64 ret_addr;
1119-
u32 tmp;
1120-
u16 nid;
1121-
u8 inst_id;
1122-
};
1123-
1124-
static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
1125-
{
1126-
u64 dram_base_addr, dram_limit_addr, dram_hole_base;
1127-
1128-
u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
1129-
u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
1130-
u8 intlv_addr_sel, intlv_addr_bit;
1131-
u8 num_intlv_bits, hashed_bit;
1132-
u8 lgcy_mmio_hole_en, base = 0;
1133-
u8 cs_mask, cs_id = 0;
1134-
bool hash_enabled = false;
1135-
1136-
struct addr_ctx ctx;
1137-
1138-
memset(&ctx, 0, sizeof(ctx));
1139-
1140-
/* Start from the normalized address */
1141-
ctx.ret_addr = norm_addr;
1142-
1143-
ctx.nid = nid;
1144-
ctx.inst_id = umc;
1145-
1146-
/* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
1147-
if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp))
1148-
goto out_err;
1149-
1150-
/* Remove HiAddrOffset from normalized address, if enabled: */
1151-
if (ctx.tmp & BIT(0)) {
1152-
u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8;
1153-
1154-
if (norm_addr >= hi_addr_offset) {
1155-
ctx.ret_addr -= hi_addr_offset;
1156-
base = 1;
1157-
}
1158-
}
1159-
1160-
/* Read D18F0x110 (DramBaseAddress). */
1161-
if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp))
1162-
goto out_err;
1163-
1164-
/* Check if address range is valid. */
1165-
if (!(ctx.tmp & BIT(0))) {
1166-
pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
1167-
__func__, ctx.tmp);
1168-
goto out_err;
1169-
}
1170-
1171-
lgcy_mmio_hole_en = ctx.tmp & BIT(1);
1172-
intlv_num_chan = (ctx.tmp >> 4) & 0xF;
1173-
intlv_addr_sel = (ctx.tmp >> 8) & 0x7;
1174-
dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16;
1175-
1176-
/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
1177-
if (intlv_addr_sel > 3) {
1178-
pr_err("%s: Invalid interleave address select %d.\n",
1179-
__func__, intlv_addr_sel);
1180-
goto out_err;
1181-
}
1182-
1183-
/* Read D18F0x114 (DramLimitAddress). */
1184-
if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp))
1185-
goto out_err;
1186-
1187-
intlv_num_sockets = (ctx.tmp >> 8) & 0x1;
1188-
intlv_num_dies = (ctx.tmp >> 10) & 0x3;
1189-
dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
1190-
1191-
intlv_addr_bit = intlv_addr_sel + 8;
1192-
1193-
/* Re-use intlv_num_chan by setting it equal to log2(#channels) */
1194-
switch (intlv_num_chan) {
1195-
case 0: intlv_num_chan = 0; break;
1196-
case 1: intlv_num_chan = 1; break;
1197-
case 3: intlv_num_chan = 2; break;
1198-
case 5: intlv_num_chan = 3; break;
1199-
case 7: intlv_num_chan = 4; break;
1200-
1201-
case 8: intlv_num_chan = 1;
1202-
hash_enabled = true;
1203-
break;
1204-
default:
1205-
pr_err("%s: Invalid number of interleaved channels %d.\n",
1206-
__func__, intlv_num_chan);
1207-
goto out_err;
1208-
}
1209-
1210-
num_intlv_bits = intlv_num_chan;
1211-
1212-
if (intlv_num_dies > 2) {
1213-
pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
1214-
__func__, intlv_num_dies);
1215-
goto out_err;
1216-
}
1217-
1218-
num_intlv_bits += intlv_num_dies;
1219-
1220-
/* Add a bit if sockets are interleaved. */
1221-
num_intlv_bits += intlv_num_sockets;
1222-
1223-
/* Assert num_intlv_bits <= 4 */
1224-
if (num_intlv_bits > 4) {
1225-
pr_err("%s: Invalid interleave bits %d.\n",
1226-
__func__, num_intlv_bits);
1227-
goto out_err;
1228-
}
1229-
1230-
if (num_intlv_bits > 0) {
1231-
u64 temp_addr_x, temp_addr_i, temp_addr_y;
1232-
u8 die_id_bit, sock_id_bit, cs_fabric_id;
1233-
1234-
/*
1235-
* Read FabricBlockInstanceInformation3_CS[BlockFabricID].
1236-
* This is the fabric id for this coherent slave. Use
1237-
* umc/channel# as instance id of the coherent slave
1238-
* for FICAA.
1239-
*/
1240-
if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp))
1241-
goto out_err;
1242-
1243-
cs_fabric_id = (ctx.tmp >> 8) & 0xFF;
1244-
die_id_bit = 0;
1245-
1246-
/* If interleaved over more than 1 channel: */
1247-
if (intlv_num_chan) {
1248-
die_id_bit = intlv_num_chan;
1249-
cs_mask = (1 << die_id_bit) - 1;
1250-
cs_id = cs_fabric_id & cs_mask;
1251-
}
1252-
1253-
sock_id_bit = die_id_bit;
1254-
1255-
/* Read D18F1x208 (SystemFabricIdMask). */
1256-
if (intlv_num_dies || intlv_num_sockets)
1257-
if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp))
1258-
goto out_err;
1259-
1260-
/* If interleaved over more than 1 die. */
1261-
if (intlv_num_dies) {
1262-
sock_id_bit = die_id_bit + intlv_num_dies;
1263-
die_id_shift = (ctx.tmp >> 24) & 0xF;
1264-
die_id_mask = (ctx.tmp >> 8) & 0xFF;
1265-
1266-
cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
1267-
}
1268-
1269-
/* If interleaved over more than 1 socket. */
1270-
if (intlv_num_sockets) {
1271-
socket_id_shift = (ctx.tmp >> 28) & 0xF;
1272-
socket_id_mask = (ctx.tmp >> 16) & 0xFF;
1273-
1274-
cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
1275-
}
1276-
1277-
/*
1278-
* The pre-interleaved address consists of XXXXXXIIIYYYYY
1279-
* where III is the ID for this CS, and XXXXXXYYYYY are the
1280-
* address bits from the post-interleaved address.
1281-
* "num_intlv_bits" has been calculated to tell us how many "I"
1282-
* bits there are. "intlv_addr_bit" tells us how many "Y" bits
1283-
* there are (where "I" starts).
1284-
*/
1285-
temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0);
1286-
temp_addr_i = (cs_id << intlv_addr_bit);
1287-
temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
1288-
ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
1289-
}
1290-
1291-
/* Add dram base address */
1292-
ctx.ret_addr += dram_base_addr;
1293-
1294-
/* If legacy MMIO hole enabled */
1295-
if (lgcy_mmio_hole_en) {
1296-
if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp))
1297-
goto out_err;
1298-
1299-
dram_hole_base = ctx.tmp & GENMASK(31, 24);
1300-
if (ctx.ret_addr >= dram_hole_base)
1301-
ctx.ret_addr += (BIT_ULL(32) - dram_hole_base);
1302-
}
1303-
1304-
if (hash_enabled) {
1305-
/* Save some parentheses and grab ls-bit at the end. */
1306-
hashed_bit = (ctx.ret_addr >> 12) ^
1307-
(ctx.ret_addr >> 18) ^
1308-
(ctx.ret_addr >> 21) ^
1309-
(ctx.ret_addr >> 30) ^
1310-
cs_id;
1311-
1312-
hashed_bit &= BIT(0);
1313-
1314-
if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0)))
1315-
ctx.ret_addr ^= BIT(intlv_addr_bit);
1316-
}
1317-
1318-
/* Is calculated system address is above DRAM limit address? */
1319-
if (ctx.ret_addr > dram_limit_addr)
1320-
goto out_err;
1321-
1322-
*sys_addr = ctx.ret_addr;
1323-
return 0;
1324-
1325-
out_err:
1326-
return -EINVAL;
1327-
}
1328-
13291055
static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
13301056

13311057
/*
@@ -3073,9 +2799,10 @@ static void decode_umc_error(int node_id, struct mce *m)
30732799
{
30742800
u8 ecc_type = (m->status >> 45) & 0x3;
30752801
struct mem_ctl_info *mci;
2802+
unsigned long sys_addr;
30762803
struct amd64_pvt *pvt;
2804+
struct atl_err a_err;
30772805
struct err_info err;
3078-
u64 sys_addr;
30792806

30802807
node_id = fixup_node_id(node_id, m);
30812808

@@ -3106,7 +2833,12 @@ static void decode_umc_error(int node_id, struct mce *m)
31062833

31072834
pvt->ops->get_err_info(m, &err);
31082835

3109-
if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
2836+
a_err.addr = m->addr;
2837+
a_err.ipid = m->ipid;
2838+
a_err.cpu = m->extcpu;
2839+
2840+
sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
2841+
if (IS_ERR_VALUE(sys_addr)) {
31102842
err.err_code = ERR_NORM_ADDR;
31112843
goto log_error;
31122844
}

0 commit comments

Comments
 (0)