Skip to content

Commit 6cfb642

Browse files
authored
AMDGPU: Minor updates to program resource registers (#69525)
- Be explicit about which program resource register is supported by which target - RSRC1 - FP16_OVFL is GFX9+ - WGP_MODE is GFX10+ - MEM_ORDERED is GFX10+ - FWD_PROGRESS is GFX10+ - RSRC3 - INST_PREF_SIZE is GFX11+ - TRAP_ON_START is GFX11+ - TRAP_ON_END is GFX11+ - IMAGE_OP is GFX11+ - Do not emit GFX11+ fields when disassembling GFX10 code objects - Tighten enforcement of reserved bits in disassembler --------- Co-authored-by: Konstantin Zhuravlyov <[email protected]>
1 parent 7b9fb1c commit 6cfb642

File tree

8 files changed

+299
-55
lines changed

8 files changed

+299
-55
lines changed

llvm/include/llvm/Support/AMDHSAKernelDescriptor.h

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,21 @@ enum : uint8_t {
7979
};
8080

8181
// Compute program resource register 1. Must match hardware definition.
82+
// GFX6+.
8283
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
8384
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
85+
// [GFX6-GFX8].
86+
#define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
87+
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
88+
// [GFX6-GFX9].
89+
#define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
90+
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
91+
// GFX9+.
92+
#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
93+
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
94+
// GFX10+.
95+
#define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
96+
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
8497
enum : int32_t {
8598
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
8699
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
@@ -95,11 +108,13 @@ enum : int32_t {
95108
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
96109
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
97110
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
98-
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
99-
COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
100-
COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
101-
COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
102-
COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
111+
COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
112+
COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
113+
COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
114+
COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
115+
COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
116+
COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
117+
COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
103118
};
104119
#undef COMPUTE_PGM_RSRC1
105120

@@ -143,15 +158,24 @@ enum : int32_t {
143158

144159
// Compute program resource register 3 for GFX10+. Must match hardware
145160
// definition.
161+
// [GFX10].
162+
#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
163+
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
164+
// GFX10+.
146165
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
147166
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
167+
// GFX11+.
168+
#define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
169+
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
148170
enum : int32_t {
149-
COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
150-
COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+
151-
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+
152-
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+
153-
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
154-
COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+
171+
COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
172+
COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
173+
COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
174+
COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
175+
COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
176+
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
177+
COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
178+
COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
155179
};
156180
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
157181

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5242,7 +5242,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
52425242
} else if (ID == ".amdhsa_fp16_overflow") {
52435243
if (IVersion.Major < 9)
52445244
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5245-
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5245+
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
52465246
ValRange);
52475247
} else if (ID == ".amdhsa_tg_split") {
52485248
if (!isGFX90A())
@@ -5252,17 +5252,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
52525252
} else if (ID == ".amdhsa_workgroup_processor_mode") {
52535253
if (IVersion.Major < 10)
52545254
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5255-
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5255+
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
52565256
ValRange);
52575257
} else if (ID == ".amdhsa_memory_ordered") {
52585258
if (IVersion.Major < 10)
52595259
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5260-
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5260+
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
52615261
ValRange);
52625262
} else if (ID == ".amdhsa_forward_progress") {
52635263
if (IVersion.Major < 10)
52645264
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5265-
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5265+
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
52665266
ValRange);
52675267
} else if (ID == ".amdhsa_shared_vgpr_count") {
52685268
if (IVersion.Major < 10)

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,16 +1818,23 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
18181818
if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
18191819
return MCDisassembler::Fail;
18201820

1821-
PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1821+
if (isGFX9Plus())
1822+
PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
18221823

1823-
if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1824+
if (!isGFX9Plus())
1825+
if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
1826+
return MCDisassembler::Fail;
1827+
if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
18241828
return MCDisassembler::Fail;
1829+
if (!isGFX10Plus())
1830+
if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
1831+
return MCDisassembler::Fail;
18251832

18261833
if (isGFX10Plus()) {
18271834
PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1828-
COMPUTE_PGM_RSRC1_WGP_MODE);
1829-
PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1830-
PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1835+
COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1836+
PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1837+
PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
18311838
}
18321839
return MCDisassembler::Success;
18331840
}
@@ -1908,16 +1915,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
19081915
PRINT_PSEUDO_DIRECTIVE_COMMENT(
19091916
"SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
19101917
}
1911-
PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
1912-
COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
1913-
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
1914-
COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
1915-
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
1916-
COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
1917-
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
1918+
1919+
if (isGFX11Plus()) {
1920+
PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
1921+
COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
1922+
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
1923+
COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
1924+
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
1925+
COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
1926+
} else {
1927+
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
1928+
return MCDisassembler::Fail;
1929+
}
1930+
1931+
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
19181932
return MCDisassembler::Fail;
1919-
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
1920-
COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
1933+
1934+
if (isGFX11Plus()) {
1935+
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
1936+
COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
1937+
} else {
1938+
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
1939+
return MCDisassembler::Fail;
1940+
}
19211941
} else if (FourByteBuffer) {
19221942
return MCDisassembler::Fail;
19231943
}

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -453,21 +453,21 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
453453
if (IVersion.Major >= 9)
454454
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
455455
compute_pgm_rsrc1,
456-
amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
456+
amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
457457
if (AMDGPU::isGFX90A(STI))
458458
PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
459459
compute_pgm_rsrc3,
460460
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
461461
if (IVersion.Major >= 10) {
462462
PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
463463
compute_pgm_rsrc1,
464-
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
464+
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
465465
PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
466466
compute_pgm_rsrc1,
467-
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
467+
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
468468
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
469469
compute_pgm_rsrc1,
470-
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
470+
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
471471
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
472472
amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
473473
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,10 +1163,10 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
11631163
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
11641164
STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
11651165
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1166-
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
1166+
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
11671167
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
11681168
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1169-
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
1169+
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
11701170
}
11711171
if (AMDGPU::isGFX90A(*STI)) {
11721172
AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,

llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@
1212
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
1313
; CHECK-NEXT: .amdhsa_kernarg_size 0
1414
; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
15-
; CHECK-NEXT: ; INST_PREF_SIZE 0
16-
; CHECK-NEXT: ; TRAP_ON_START 0
17-
; CHECK-NEXT: ; TRAP_ON_END 0
18-
; CHECK-NEXT: ; IMAGE_OP 0
1915
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
2016
; CHECK-NEXT: .amdhsa_reserve_vcc 0
2117
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -69,10 +65,6 @@
6965
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
7066
; CHECK-NEXT: .amdhsa_kernarg_size 0
7167
; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
72-
; CHECK-NEXT: ; INST_PREF_SIZE 0
73-
; CHECK-NEXT: ; TRAP_ON_START 0
74-
; CHECK-NEXT: ; TRAP_ON_END 0
75-
; CHECK-NEXT: ; IMAGE_OP 0
7668
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
7769
; CHECK-NEXT: .amdhsa_reserve_vcc 0
7870
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -126,10 +118,6 @@
126118
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
127119
; CHECK-NEXT: .amdhsa_kernarg_size 0
128120
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
129-
; CHECK-NEXT: ; INST_PREF_SIZE 0
130-
; CHECK-NEXT: ; TRAP_ON_START 0
131-
; CHECK-NEXT: ; TRAP_ON_END 0
132-
; CHECK-NEXT: ; IMAGE_OP 0
133121
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
134122
; CHECK-NEXT: .amdhsa_reserve_vcc 0
135123
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -183,10 +171,6 @@
183171
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
184172
; CHECK-NEXT: .amdhsa_kernarg_size 0
185173
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
186-
; CHECK-NEXT: ; INST_PREF_SIZE 0
187-
; CHECK-NEXT: ; TRAP_ON_START 0
188-
; CHECK-NEXT: ; TRAP_ON_END 0
189-
; CHECK-NEXT: ; IMAGE_OP 0
190174
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
191175
; CHECK-NEXT: .amdhsa_reserve_vcc 0
192176
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0

0 commit comments

Comments
 (0)