Skip to content

Commit 8d4977a

Browse files
authored
[AMDGPU] Update hardware registers for GFX12 (#74445)
1 parent ecd2f56 commit 8d4977a

File tree

4 files changed

+144
-18
lines changed

4 files changed

+144
-18
lines changed

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,9 @@ enum Id { // HwRegCode, (6) [5:0]
478478
ID_GPR_ALLOC = 5,
479479
ID_LDS_ALLOC = 6,
480480
ID_IB_STS = 7,
481+
ID_PERF_SNAPSHOT_DATA_gfx12 = 10,
482+
ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11,
483+
ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12,
481484
ID_MEM_BASES = 15,
482485
ID_TBA_LO = 16,
483486
ID_TBA_HI = 17,
@@ -489,12 +492,23 @@ enum Id { // HwRegCode, (6) [5:0]
489492
ID_HW_ID1 = 23,
490493
ID_HW_ID2 = 24,
491494
ID_POPS_PACKER = 25,
492-
ID_PERF_SNAPSHOT_DATA = 27,
495+
ID_PERF_SNAPSHOT_DATA_gfx11 = 27,
493496
ID_SHADER_CYCLES = 29,
494-
495-
// Register numbers reused in GFX11+
496-
ID_PERF_SNAPSHOT_PC_LO = 18,
497-
ID_PERF_SNAPSHOT_PC_HI = 19,
497+
ID_SHADER_CYCLES_HI = 30,
498+
ID_DVGPR_ALLOC_LO = 31,
499+
ID_DVGPR_ALLOC_HI = 32,
500+
501+
// Register numbers reused in GFX11
502+
ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18,
503+
ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19,
504+
505+
// Register numbers reused in GFX12+
506+
ID_STATE_PRIV = 4,
507+
ID_PERF_SNAPSHOT_DATA1 = 15,
508+
ID_PERF_SNAPSHOT_DATA2 = 16,
509+
ID_EXCP_FLAG_PRIV = 17,
510+
ID_EXCP_FLAG_USER = 18,
511+
ID_TRAP_CTRL = 19,
498512

499513
// GFX940 specific registers
500514
ID_XCC_ID = 20,

llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,41 +87,56 @@ namespace Hwreg {
8787

8888
// Disable lint checking for this block since it makes the table unreadable.
8989
// NOLINTBEGIN
90+
// clang-format off
9091
const CustomOperand<const MCSubtargetInfo &> Opr[] = {
9192
{{""}},
9293
{{"HW_REG_MODE"}, ID_MODE},
9394
{{"HW_REG_STATUS"}, ID_STATUS},
94-
{{"HW_REG_TRAPSTS"}, ID_TRAPSTS},
95+
{{"HW_REG_TRAPSTS"}, ID_TRAPSTS, isNotGFX12Plus},
9596
{{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus},
9697
{{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC},
9798
{{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC},
9899
{{"HW_REG_IB_STS"}, ID_IB_STS},
99100
{{""}},
100101
{{""}},
102+
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
103+
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
104+
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
101105
{{""}},
102106
{{""}},
103-
{{""}},
104-
{{""}},
105-
{{""}},
106-
{{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9Plus},
107+
{{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9_GFX10_GFX11},
107108
{{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10},
108109
{{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10},
109110
{{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10},
110111
{{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10},
111-
{{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10Plus},
112-
{{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10Plus},
112+
{{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10_GFX11},
113+
{{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10_GFX11},
113114
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030},
114115
{{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus},
115116
{{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus},
116117
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
117118
{{""}},
118-
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA, isGFX11Plus},
119+
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
119120
{{""}},
120-
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding},
121-
122-
// Register numbers reused in GFX11+
123-
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO, isGFX11Plus},
124-
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI, isGFX11Plus},
121+
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11},
122+
{{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
123+
{{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
124+
{{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
125+
126+
// Register numbers reused in GFX11
127+
{{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
128+
{{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
129+
130+
// Register numbers reused in GFX12+
131+
{{"HW_REG_STATE_PRIV"}, ID_STATE_PRIV, isGFX12Plus},
132+
{{"HW_REG_PERF_SNAPSHOT_DATA1"}, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
133+
{{"HW_REG_PERF_SNAPSHOT_DATA2"}, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
134+
{{"HW_REG_EXCP_FLAG_PRIV"}, ID_EXCP_FLAG_PRIV, isGFX12Plus},
135+
{{"HW_REG_EXCP_FLAG_USER"}, ID_EXCP_FLAG_USER, isGFX12Plus},
136+
{{"HW_REG_TRAP_CTRL"}, ID_TRAP_CTRL, isGFX12Plus},
137+
{{"HW_REG_SCRATCH_BASE_LO"}, ID_FLAT_SCR_LO, isGFX12Plus},
138+
{{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
139+
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
125140

126141
// GFX940 specific registers
127142
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
@@ -133,6 +148,7 @@ const CustomOperand<const MCSubtargetInfo &> Opr[] = {
133148
// Aliases
134149
{{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
135150
};
151+
// clang-format on
136152
// NOLINTEND
137153

138154
const int OPR_SIZE = static_cast<int>(

llvm/test/MC/AMDGPU/gfx12_asm_sopk.s

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ s_getreg_b32 s0, hwreg(HW_REG_MODE)
183183
s_getreg_b32 s0, hwreg(HW_REG_STATUS)
184184
// GFX12: encoding: [0x02,0xf8,0x80,0xb8]
185185

186+
s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV)
187+
// GFX12: encoding: [0x04,0xf8,0x80,0xb8]
188+
186189
s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC)
187190
// GFX12: encoding: [0x05,0xf8,0x80,0xb8]
188191

@@ -192,8 +195,50 @@ s_getreg_b32 s0, hwreg(HW_REG_LDS_ALLOC)
192195
s_getreg_b32 s0, hwreg(HW_REG_IB_STS)
193196
// GFX12: encoding: [0x07,0xf8,0x80,0xb8]
194197

198+
s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA)
199+
// GFX12: encoding: [0x0a,0xf8,0x80,0xb8]
200+
201+
s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO)
202+
// GFX12: encoding: [0x0b,0xf8,0x80,0xb8]
203+
204+
s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI)
205+
// GFX12: encoding: [0x0c,0xf8,0x80,0xb8]
206+
207+
s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1)
208+
// GFX12: encoding: [0x0f,0xf8,0x80,0xb8]
209+
210+
s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2)
211+
// GFX12: encoding: [0x10,0xf8,0x80,0xb8]
212+
213+
s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV)
214+
// GFX12: encoding: [0x11,0xf8,0x80,0xb8]
215+
216+
s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER)
217+
// GFX12: encoding: [0x12,0xf8,0x80,0xb8]
218+
219+
s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL)
220+
// GFX12: encoding: [0x13,0xf8,0x80,0xb8]
221+
222+
s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO)
223+
// GFX12: encoding: [0x14,0xf8,0x80,0xb8]
224+
225+
s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI)
226+
// GFX12: encoding: [0x15,0xf8,0x80,0xb8]
227+
195228
s_getreg_b32 s0, hwreg(HW_REG_HW_ID1)
196229
// GFX12: encoding: [0x17,0xf8,0x80,0xb8]
197230

198231
s_getreg_b32 s0, hwreg(HW_REG_HW_ID2)
199232
// GFX12: encoding: [0x18,0xf8,0x80,0xb8]
233+
234+
s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO)
235+
// GFX12: encoding: [0x1f,0xf8,0x80,0xb8]
236+
237+
s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI)
238+
// GFX12: encoding: [0x20,0xf8,0x80,0xb8]
239+
240+
s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO)
241+
// GFX12: encoding: [0x1d,0xf8,0x80,0xb8]
242+
243+
s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI)
244+
// GFX12: encoding: [0x1e,0xf8,0x80,0xb8]

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopk.txt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@
7676
# GFX12: s_getreg_b32 s0, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0x80,0xb8]
7777
0x34,0x12,0x80,0xb8
7878

79+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25) ; encoding: [0xd1,0xc1,0x80,0xb8]
80+
0xd1,0xc1,0x80,0xb8
81+
7982
# GFX12: s_getreg_b32 s105, hwreg(52, 8, 3) ; encoding: [0x34,0x12,0xe9,0xb8]
8083
0x34,0x12,0xe9,0xb8
8184

@@ -154,6 +157,9 @@
154157
# GFX12: s_setreg_b32 hwreg(52, 8, 3), vcc_lo ; encoding: [0x34,0x12,0x6a,0xb9]
155158
0x34,0x12,0x6a,0xb9
156159

160+
# GFX12: s_setreg_b32 hwreg(HW_REG_EXCP_FLAG_PRIV, 7, 25), s0 ; encoding: [0xd1,0xc1,0x00,0xb9]
161+
0xd1,0xc1,0x00,0xb9
162+
157163
# GFX12: s_version 0x1234 ; encoding: [0x34,0x12,0x80,0xb0]
158164
0x34,0x12,0x80,0xb0
159165

@@ -190,6 +196,9 @@
190196
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATUS) ; encoding: [0x02,0xf8,0x80,0xb8]
191197
0x02,0xf8,0x80,0xb8
192198

199+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_STATE_PRIV) ; encoding: [0x04,0xf8,0x80,0xb8]
200+
0x04,0xf8,0x80,0xb8
201+
193202
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_GPR_ALLOC) ; encoding: [0x05,0xf8,0x80,0xb8]
194203
0x05,0xf8,0x80,0xb8
195204

@@ -199,8 +208,50 @@
199208
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_IB_STS) ; encoding: [0x07,0xf8,0x80,0xb8]
200209
0x07,0xf8,0x80,0xb8
201210

211+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA) ; encoding: [0x0a,0xf8,0x80,0xb8]
212+
0x0a,0xf8,0x80,0xb8
213+
214+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_LO) ; encoding: [0x0b,0xf8,0x80,0xb8]
215+
0x0b,0xf8,0x80,0xb8
216+
217+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_PC_HI) ; encoding: [0x0c,0xf8,0x80,0xb8]
218+
0x0c,0xf8,0x80,0xb8
219+
220+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA1) ; encoding: [0x0f,0xf8,0x80,0xb8]
221+
0x0f,0xf8,0x80,0xb8
222+
223+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_PERF_SNAPSHOT_DATA2) ; encoding: [0x10,0xf8,0x80,0xb8]
224+
0x10,0xf8,0x80,0xb8
225+
226+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_PRIV) ; encoding: [0x11,0xf8,0x80,0xb8]
227+
0x11,0xf8,0x80,0xb8
228+
229+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_EXCP_FLAG_USER) ; encoding: [0x12,0xf8,0x80,0xb8]
230+
0x12,0xf8,0x80,0xb8
231+
232+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_TRAP_CTRL) ; encoding: [0x13,0xf8,0x80,0xb8]
233+
0x13,0xf8,0x80,0xb8
234+
235+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_LO) ; encoding: [0x14,0xf8,0x80,0xb8]
236+
0x14,0xf8,0x80,0xb8
237+
238+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SCRATCH_BASE_HI) ; encoding: [0x15,0xf8,0x80,0xb8]
239+
0x15,0xf8,0x80,0xb8
240+
202241
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID1) ; encoding: [0x17,0xf8,0x80,0xb8]
203242
0x17,0xf8,0x80,0xb8
204243

205244
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_HW_ID2) ; encoding: [0x18,0xf8,0x80,0xb8]
206245
0x18,0xf8,0x80,0xb8
246+
247+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_LO) ; encoding: [0x1f,0xf8,0x80,0xb8]
248+
0x1f,0xf8,0x80,0xb8
249+
250+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_DVGPR_ALLOC_HI) ; encoding: [0x20,0xf8,0x80,0xb8]
251+
0x20,0xf8,0x80,0xb8
252+
253+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_LO) ; encoding: [0x1d,0xf8,0x80,0xb8]
254+
0x1d,0xf8,0x80,0xb8
255+
256+
# GFX12: s_getreg_b32 s0, hwreg(HW_REG_SHADER_CYCLES_HI) ; encoding: [0x1e,0xf8,0x80,0xb8]
257+
0x1e,0xf8,0x80,0xb8

0 commit comments

Comments
 (0)