Skip to content

AMDGPU: Minor updates to program resource registers #69525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 35 additions & 11 deletions llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,21 @@ enum : uint8_t {
};

// Compute program resource register 1. Must match hardware definition.
// GFX6+.
#define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
// [GFX6-GFX8].
#define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
// [GFX6-GFX9].
#define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
// GFX9+.
#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
// GFX10+.
#define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
Expand All @@ -95,11 +108,13 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
};
#undef COMPUTE_PGM_RSRC1

Expand Down Expand Up @@ -143,15 +158,24 @@ enum : int32_t {

// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
// [GFX10].
#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
// GFX10+.
#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
// GFX11+.
#define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+
COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+
COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
};
#undef COMPUTE_PGM_RSRC3_GFX10_PLUS

Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5242,7 +5242,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
Expand All @@ -5252,17 +5252,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
ValRange);
} else if (ID == ".amdhsa_shared_vgpr_count") {
if (IVersion.Major < 10)
Expand Down
48 changes: 34 additions & 14 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1818,16 +1818,23 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
return MCDisassembler::Fail;

PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
if (isGFX9Plus())
PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);

if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
if (!isGFX9Plus())
if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
return MCDisassembler::Fail;
if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
return MCDisassembler::Fail;
if (!isGFX10Plus())
if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
return MCDisassembler::Fail;

if (isGFX10Plus()) {
PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
COMPUTE_PGM_RSRC1_WGP_MODE);
PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
}
return MCDisassembler::Success;
}
Expand Down Expand Up @@ -1908,16 +1915,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
PRINT_PSEUDO_DIRECTIVE_COMMENT(
"SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)

if (isGFX11Plus()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
} else {
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
return MCDisassembler::Fail;
}

if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
return MCDisassembler::Fail;
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);

if (isGFX11Plus()) {
PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
} else {
if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
return MCDisassembler::Fail;
}
} else if (FourByteBuffer) {
return MCDisassembler::Fail;
}
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,21 +453,21 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 9)
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
if (AMDGPU::isGFX90A(STI))
PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
if (IVersion.Major >= 10) {
PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,10 +1163,10 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
}
if (AMDGPU::isGFX90A(*STI)) {
AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
Expand Down
16 changes: 0 additions & 16 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
Expand Down Expand Up @@ -69,10 +65,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
Expand Down Expand Up @@ -126,10 +118,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
Expand Down Expand Up @@ -183,10 +171,6 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
; CHECK-NEXT: ; INST_PREF_SIZE 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
; CHECK-NEXT: .amdhsa_reserve_vcc 0
; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
Expand Down
Loading