Skip to content

Commit 453f0ae

Browse files
Muralidhara M Kbp3tk0v
authored andcommitted
RAS/AMD/ATL: Add MI300 support
AMD MI300 systems include on-die HBM3 memory and a unique topology. And they fall under Data Fabric version 4.5 in overall design. Generally, topology information (IDs, etc.) is gathered from Data Fabric registers. However, the unique topology for MI300 means that some topology information is fixed in hardware and follows arbitrary mappings. Furthermore, not all hardware instances are software-visible, so register accesses must be adjusted. Recognize and add helper functions for the new MI300 interleave modes. Add lookup tables for fixed values where appropriate. Adjust how Die and Node IDs are found and used. Also, fix some register bitmasks that were mislabeled. Signed-off-by: Muralidhara M K <[email protected]> Co-developed-by: Yazen Ghannam <[email protected]> Signed-off-by: Yazen Ghannam <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 1289c43 commit 453f0ae

File tree

8 files changed

+309
-5
lines changed

8 files changed

+309
-5
lines changed

drivers/ras/amd/atl/access.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,40 @@ static DEFINE_MUTEX(df_indirect_mutex);
3636

3737
#define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2)
3838

39+
static u16 get_accessible_node(u16 node)
40+
{
41+
/*
42+
* On heterogeneous systems, not all AMD Nodes are accessible
43+
* through software-visible registers. The Node ID needs to be
44+
* adjusted for register accesses. But its value should not be
45+
* changed for the translation methods.
46+
*/
47+
if (df_cfg.flags.heterogeneous) {
48+
/* Only Node 0 is accessible on DF3.5 systems. */
49+
if (df_cfg.rev == DF3p5)
50+
node = 0;
51+
52+
/*
53+
* Only the first Node in each Socket is accessible on
54+
* DF4.5 systems, and this is visible to software as one
55+
* Fabric per Socket. The Socket ID can be derived from
56+
* the Node ID and global shift values.
57+
*/
58+
if (df_cfg.rev == DF4p5)
59+
node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift;
60+
}
61+
62+
return node;
63+
}
64+
3965
static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
4066
{
4167
u32 ficaa_addr = 0x8C, ficad_addr = 0xB8;
4268
struct pci_dev *F4;
4369
int err = -ENODEV;
4470
u32 ficaa = 0;
4571

72+
node = get_accessible_node(node);
4673
if (node >= amd_nb_num())
4774
goto out;
4875

drivers/ras/amd/atl/dehash.c

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
253253
hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
254254
hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
255255
hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
256-
hash_ctl_1T = FIELD_GET(DF4_HASH_CTL_1T, ctx->map.ctl);
256+
hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
257257

258258
/*
259259
* Generate a unique address to determine which bits
@@ -343,6 +343,94 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
343343
return 0;
344344
}
345345

346+
/*
347+
* MI300 hash bits
348+
* 4K 64K 2M 1G 1T 1T
349+
* COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43}
350+
* COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44}
351+
* COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45}
352+
* COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46}
353+
* COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack
354+
* DieID[0] = XOR of addr{12, 20, 27, 34, 41 }
355+
* DieID[1] = XOR of addr{13, 21, 28, 35, 42 }
356+
*/
357+
static int mi300_dehash_addr(struct addr_ctx *ctx)
358+
{
359+
bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
360+
bool hashed_bit, intlv_bit, test_bit;
361+
u8 num_intlv_bits, base_bit, i;
362+
363+
if (!map_bits_valid(ctx, 8, 8, 4, 1))
364+
return -EINVAL;
365+
366+
hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl);
367+
hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
368+
hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
369+
hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
370+
hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
371+
372+
/* Channel bits */
373+
num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
374+
375+
for (i = 0; i < num_intlv_bits; i++) {
376+
base_bit = 8 + i;
377+
378+
/* COH_ST_Select[4] jumps to a base bit of 14. */
379+
if (i == 4)
380+
base_bit = 14;
381+
382+
intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr;
383+
384+
hashed_bit = intlv_bit;
385+
386+
/* 4k hash bit only applies to the first 3 bits. */
387+
if (i <= 2) {
388+
test_bit = BIT_ULL(12 + i) & ctx->ret_addr;
389+
hashed_bit ^= test_bit & hash_ctl_4k;
390+
}
391+
392+
/* Use temporary 'test_bit' value to avoid Sparse warnings. */
393+
test_bit = BIT_ULL(15 + i) & ctx->ret_addr;
394+
hashed_bit ^= test_bit & hash_ctl_64k;
395+
test_bit = BIT_ULL(22 + i) & ctx->ret_addr;
396+
hashed_bit ^= test_bit & hash_ctl_2M;
397+
test_bit = BIT_ULL(29 + i) & ctx->ret_addr;
398+
hashed_bit ^= test_bit & hash_ctl_1G;
399+
test_bit = BIT_ULL(36 + i) & ctx->ret_addr;
400+
hashed_bit ^= test_bit & hash_ctl_1T;
401+
test_bit = BIT_ULL(43 + i) & ctx->ret_addr;
402+
hashed_bit ^= test_bit & hash_ctl_1T;
403+
404+
if (hashed_bit != intlv_bit)
405+
ctx->ret_addr ^= BIT_ULL(base_bit);
406+
}
407+
408+
/* Die bits */
409+
num_intlv_bits = ilog2(ctx->map.num_intlv_dies);
410+
411+
for (i = 0; i < num_intlv_bits; i++) {
412+
base_bit = 12 + i;
413+
414+
intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr;
415+
416+
hashed_bit = intlv_bit;
417+
418+
test_bit = BIT_ULL(20 + i) & ctx->ret_addr;
419+
hashed_bit ^= test_bit & hash_ctl_64k;
420+
test_bit = BIT_ULL(27 + i) & ctx->ret_addr;
421+
hashed_bit ^= test_bit & hash_ctl_2M;
422+
test_bit = BIT_ULL(34 + i) & ctx->ret_addr;
423+
hashed_bit ^= test_bit & hash_ctl_1G;
424+
test_bit = BIT_ULL(41 + i) & ctx->ret_addr;
425+
hashed_bit ^= test_bit & hash_ctl_1T;
426+
427+
if (hashed_bit != intlv_bit)
428+
ctx->ret_addr ^= BIT_ULL(base_bit);
429+
}
430+
431+
return 0;
432+
}
433+
346434
int dehash_address(struct addr_ctx *ctx)
347435
{
348436
switch (ctx->map.intlv_mode) {
@@ -400,6 +488,11 @@ int dehash_address(struct addr_ctx *ctx)
400488
case DF4p5_NPS1_16CHAN_2K_HASH:
401489
return df4p5_dehash_addr(ctx);
402490

491+
case MI3_HASH_8CHAN:
492+
case MI3_HASH_16CHAN:
493+
case MI3_HASH_32CHAN:
494+
return mi300_dehash_addr(ctx);
495+
403496
default:
404497
atl_debug_on_bad_intlv_mode(ctx);
405498
return -EINVAL;

drivers/ras/amd/atl/denormalize.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,40 @@ static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx)
8080
return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr);
8181
}
8282

83+
/*
84+
* Make space for CS ID at bits [14:8] as follows:
85+
*
86+
* 8 channels -> bits [10:8]
87+
* 16 channels -> bits [11:8]
88+
* 32 channels -> bits [14,11:8]
89+
*
90+
* 1 die -> N/A
91+
* 2 dies -> bit [12]
92+
* 4 dies -> bits [13:12]
93+
*/
94+
static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx)
95+
{
96+
u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
97+
u64 denorm_addr;
98+
99+
if (ctx->map.intlv_bit_pos != 8) {
100+
pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
101+
return ~0ULL;
102+
}
103+
104+
/* Channel bits. Covers up to 4 bits at [11:8]. */
105+
denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr);
106+
107+
/* Die bits. Always starts at [12]. */
108+
denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr);
109+
110+
/* Additional channel bit at [14]. */
111+
if (num_intlv_bits > 4)
112+
denorm_addr = expand_bits(14, 1, denorm_addr);
113+
114+
return denorm_addr;
115+
}
116+
83117
/*
84118
* Take the current calculated address and shift enough bits in the middle
85119
* to make a gap where the interleave bits will be inserted.
@@ -107,6 +141,12 @@ static u64 make_space_for_coh_st_id(struct addr_ctx *ctx)
107141
case DF4p5_NPS1_8CHAN_2K_HASH:
108142
case DF4p5_NPS1_16CHAN_2K_HASH:
109143
return make_space_for_coh_st_id_split_2_1(ctx);
144+
145+
case MI3_HASH_8CHAN:
146+
case MI3_HASH_16CHAN:
147+
case MI3_HASH_32CHAN:
148+
return make_space_for_coh_st_id_mi300(ctx);
149+
110150
default:
111151
atl_debug_on_bad_intlv_mode(ctx);
112152
return ~0ULL;
@@ -204,6 +244,32 @@ static u16 get_coh_st_id_df4(struct addr_ctx *ctx)
204244
return coh_st_id;
205245
}
206246

247+
/*
248+
* MI300 hash has:
249+
* (C)hannel[3:0] = coh_st_id[3:0]
250+
* (S)tack[0] = coh_st_id[4]
251+
* (D)ie[1:0] = coh_st_id[6:5]
252+
*
253+
* Hashed coh_st_id is swizzled so that Stack bit is at the end.
254+
* coh_st_id = SDDCCCC
255+
*/
256+
static u16 get_coh_st_id_mi300(struct addr_ctx *ctx)
257+
{
258+
u8 channel_bits, die_bits, stack_bit;
259+
u16 die_id;
260+
261+
/* Subtract the "base" Destination Fabric ID. */
262+
ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx);
263+
264+
die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift;
265+
266+
channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id);
267+
stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6;
268+
die_bits = die_id << 4;
269+
270+
return stack_bit | die_bits | channel_bits;
271+
}
272+
207273
/*
208274
* Derive the correct Coherent Station ID that represents the interleave bits
209275
* used within the system physical address. This accounts for the
@@ -237,6 +303,11 @@ static u16 calculate_coh_st_id(struct addr_ctx *ctx)
237303
case DF4p5_NPS1_16CHAN_2K_HASH:
238304
return get_coh_st_id_df4(ctx);
239305

306+
case MI3_HASH_8CHAN:
307+
case MI3_HASH_16CHAN:
308+
case MI3_HASH_32CHAN:
309+
return get_coh_st_id_mi300(ctx);
310+
240311
/* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */
241312
case DF4p5_NPS2_4CHAN_1K_HASH:
242313
case DF4p5_NPS1_8CHAN_1K_HASH:
@@ -287,6 +358,9 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id
287358
case NOHASH_8CHAN:
288359
case NOHASH_16CHAN:
289360
case NOHASH_32CHAN:
361+
case MI3_HASH_8CHAN:
362+
case MI3_HASH_16CHAN:
363+
case MI3_HASH_32CHAN:
290364
case DF2_2CHAN_HASH:
291365
return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id);
292366

@@ -314,13 +388,41 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id
314388
}
315389
}
316390

391+
/*
392+
* MI300 systems have a fixed, hardware-defined physical-to-logical
393+
* Coherent Station mapping. The Remap registers are not used.
394+
*/
395+
static const u16 phy_to_log_coh_st_map_mi300[] = {
396+
12, 13, 14, 15,
397+
8, 9, 10, 11,
398+
4, 5, 6, 7,
399+
0, 1, 2, 3,
400+
28, 29, 30, 31,
401+
24, 25, 26, 27,
402+
20, 21, 22, 23,
403+
16, 17, 18, 19,
404+
};
405+
406+
static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx)
407+
{
408+
if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) {
409+
atl_debug(ctx, "Instance ID out of range");
410+
return ~0;
411+
}
412+
413+
return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift);
414+
}
415+
317416
static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
318417
{
319418
u16 component_id, log_fabric_id;
320419

321420
/* Start with the physical COH_ST Fabric ID. */
322421
u16 phys_fabric_id = ctx->coh_st_fabric_id;
323422

423+
if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
424+
return get_logical_coh_st_fabric_id_mi300(ctx);
425+
324426
/* Skip logical ID lookup if remapping is disabled. */
325427
if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) &&
326428
ctx->map.intlv_mode != DF3_6CHAN)

drivers/ras/amd/atl/internal.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@
2727
/* PCI ID for Zen4 Server DF Function 0. */
2828
#define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022
2929

30+
/* PCI IDs for MI300 DF Function 0. */
31+
#define DF_FUNC0_ID_MI300 0x15281022
32+
3033
/* Shift needed for adjusting register values to true values. */
3134
#define DF_DRAM_BASE_LIMIT_LSB 28
35+
#define MI300_DRAM_LIMIT_LSB 20
3236

3337
enum df_revisions {
3438
UNKNOWN,
@@ -59,6 +63,9 @@ enum intlv_modes {
5963
DF4_NPS1_12CHAN_HASH = 0x15,
6064
DF4_NPS2_5CHAN_HASH = 0x16,
6165
DF4_NPS1_10CHAN_HASH = 0x17,
66+
MI3_HASH_8CHAN = 0x18,
67+
MI3_HASH_16CHAN = 0x19,
68+
MI3_HASH_32CHAN = 0x1A,
6269
DF2_2CHAN_HASH = 0x21,
6370
/* DF4.5 modes are all IntLvNumChan + 0x20 */
6471
DF4p5_NPS1_16CHAN_1K_HASH = 0x2C,
@@ -86,7 +93,8 @@ enum intlv_modes {
8693
struct df_flags {
8794
__u8 legacy_ficaa : 1,
8895
socket_id_shift_quirk : 1,
89-
__reserved_0 : 6;
96+
heterogeneous : 1,
97+
__reserved_0 : 5;
9098
};
9199

92100
struct df_config {

drivers/ras/amd/atl/map.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ static int df4p5_get_intlv_mode(struct addr_ctx *ctx)
6363
if (ctx->map.intlv_mode <= NOHASH_32CHAN)
6464
return 0;
6565

66+
if (ctx->map.intlv_mode >= MI3_HASH_8CHAN &&
67+
ctx->map.intlv_mode <= MI3_HASH_32CHAN)
68+
return 0;
69+
6670
/*
6771
* Modes matching the ranges above are returned as-is.
6872
*
@@ -125,6 +129,9 @@ static u64 get_hi_addr_offset(u32 reg_dram_offset)
125129
atl_debug_on_bad_df_rev();
126130
}
127131

132+
if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
133+
shift = MI300_DRAM_LIMIT_LSB;
134+
128135
return hi_addr_offset << shift;
129136
}
130137

@@ -369,6 +376,13 @@ static int get_coh_st_fabric_id(struct addr_ctx *ctx)
369376
{
370377
u32 reg;
371378

379+
/*
380+
* On MI300 systems, the Coherent Station Fabric ID is derived
381+
* later. And it does not depend on the register value.
382+
*/
383+
if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
384+
return 0;
385+
372386
/* Read D18F0x50 (FabricBlockInstanceInformation3). */
373387
if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, &reg))
374388
return -EINVAL;
@@ -490,6 +504,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx)
490504
case NOHASH_8CHAN:
491505
case DF3_COD1_8CHAN_HASH:
492506
case DF4_NPS1_8CHAN_HASH:
507+
case MI3_HASH_8CHAN:
493508
case DF4p5_NPS1_8CHAN_1K_HASH:
494509
case DF4p5_NPS1_8CHAN_2K_HASH:
495510
return 8;
@@ -502,13 +517,15 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx)
502517
case DF4p5_NPS1_12CHAN_2K_HASH:
503518
return 12;
504519
case NOHASH_16CHAN:
520+
case MI3_HASH_16CHAN:
505521
case DF4p5_NPS1_16CHAN_1K_HASH:
506522
case DF4p5_NPS1_16CHAN_2K_HASH:
507523
return 16;
508524
case DF4p5_NPS0_24CHAN_1K_HASH:
509525
case DF4p5_NPS0_24CHAN_2K_HASH:
510526
return 24;
511527
case NOHASH_32CHAN:
528+
case MI3_HASH_32CHAN:
512529
return 32;
513530
default:
514531
atl_debug_on_bad_intlv_mode(ctx);

0 commit comments

Comments
 (0)