-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Reland [AMDGPU] MCExpr-ify MC layer kernel descriptor #86494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…hsa symbolic expression tests, apply feedback
@llvm/pr-subscribers-mc Author: Janek van Oirschot (JanekvO) ChangesKernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use. Relands #80855 with fixes Patch is 172.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86494.diff 19 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 72e8b59e0a4096..052b231d62a3eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,6 +22,7 @@
#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
#include "SIMachineFunctionInfo.h"
@@ -428,38 +429,43 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
return KernelCodeProperties;
}
-amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const {
+MCKernelDescriptor
+AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ MCContext &Ctx = MF.getContext();
- amdhsa::kernel_descriptor_t KernelDescriptor;
- memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
+ MCKernelDescriptor KernelDescriptor;
assert(isUInt<32>(PI.ScratchSize));
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));
- KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
- KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.group_segment_fixed_size =
+ MCConstantExpr::create(PI.LDSSize, Ctx);
+ KernelDescriptor.private_segment_fixed_size =
+ MCConstantExpr::create(PI.ScratchSize, Ctx);
Align MaxKernArgAlign;
- KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ KernelDescriptor.kernarg_size = MCConstantExpr::create(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
- KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
- KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
- KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+ KernelDescriptor.compute_pgm_rsrc1 =
+ MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx);
+ KernelDescriptor.compute_pgm_rsrc2 =
+ MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx);
+ KernelDescriptor.kernel_code_properties =
+ MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx);
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
- if (STM.hasGFX90AInsts())
- KernelDescriptor.compute_pgm_rsrc3 =
- CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+ STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx);
- if (AMDGPU::hasKernargPreload(STM))
- KernelDescriptor.kernarg_preload =
- static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+ KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+ AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+ Ctx);
return KernelDescriptor;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 79326cd3d3289f..b8b2718d293e69 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -28,15 +28,12 @@ class MCCodeEmitter;
class MCOperand;
namespace AMDGPU {
+struct MCKernelDescriptor;
namespace HSAMD {
class MetadataStreamer;
}
} // namespace AMDGPU
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
unsigned CodeObjectVersion;
@@ -75,9 +72,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint16_t getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const;
- amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const;
+ AMDGPU::MCKernelDescriptor
+ getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const;
void initTargetStreamer(Module &M);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 529705479646fc..72154afb2c010e 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
@@ -5417,7 +5418,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+ AMDGPU::MCKernelDescriptor KD =
+ AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
+ &getSTI(), getContext());
StringSet<> Seen;
@@ -5457,89 +5460,111 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError(".amdhsa_ directives cannot be repeated");
SMLoc ValStart = getLoc();
- int64_t IVal;
- if (getParser().parseAbsoluteExpression(IVal))
+ const MCExpr *ExprVal;
+ if (getParser().parseExpression(ExprVal))
return true;
SMLoc ValEnd = getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
- if (IVal < 0)
- return OutOfRangeError(ValRange);
-
+ int64_t IVal = 0;
uint64_t Val = IVal;
+ bool EvaluatableExpr;
+ if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+ Val = IVal;
+ }
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
- if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ if (!isUInt<ENTRY##_WIDTH>(Val)) \
return OutOfRangeError(RANGE); \
- AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+ AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
+ getContext());
+
+// Some fields use the parsed value immediately which requires the expression to
+// be solvable.
+#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
+ if (!(RESOLVED)) \
+ return Error(IDRange.Start, "directive should have resolvable expression", \
+ IDRange);
if (ID == ".amdhsa_group_segment_fixed_size") {
- if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
+ CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.group_segment_fixed_size = Val;
+ KD.group_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_private_segment_fixed_size") {
- if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
+ CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.private_segment_fixed_size = Val;
+ KD.private_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_kernarg_size") {
- if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.kernarg_size = Val;
+ KD.kernarg_size = ExprVal;
} else if (ID == ".amdhsa_user_sgpr_count") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val > getMaxNumUserSGPRs())
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
ValRange);
if (Val) {
ImpliedUserSGPRCount += Val;
PreloadLength = Val;
}
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val >= 1024)
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
ValRange);
if (Val)
PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
@@ -5548,34 +5573,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
- ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- Val, ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_uses_dynamic_stack") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_enable_private_segment") {
if (!hasArchitectedFlatScratch())
return Error(
@@ -5583,42 +5613,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"directive is not supported without architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
@@ -5638,97 +5674,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_round_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
- ValRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PL...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Janek van Oirschot (JanekvO) ChangesKernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use. Relands #80855 with fixes Patch is 172.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86494.diff 19 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 72e8b59e0a4096..052b231d62a3eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -22,6 +22,7 @@
#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
#include "SIMachineFunctionInfo.h"
@@ -428,38 +429,43 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
return KernelCodeProperties;
}
-amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const {
+MCKernelDescriptor
+AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ MCContext &Ctx = MF.getContext();
- amdhsa::kernel_descriptor_t KernelDescriptor;
- memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
+ MCKernelDescriptor KernelDescriptor;
assert(isUInt<32>(PI.ScratchSize));
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
assert(isUInt<32>(PI.getComputePGMRSrc2()));
- KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
- KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.group_segment_fixed_size =
+ MCConstantExpr::create(PI.LDSSize, Ctx);
+ KernelDescriptor.private_segment_fixed_size =
+ MCConstantExpr::create(PI.ScratchSize, Ctx);
Align MaxKernArgAlign;
- KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ KernelDescriptor.kernarg_size = MCConstantExpr::create(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
- KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM);
- KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
- KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+ KernelDescriptor.compute_pgm_rsrc1 =
+ MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx);
+ KernelDescriptor.compute_pgm_rsrc2 =
+ MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx);
+ KernelDescriptor.kernel_code_properties =
+ MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx);
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
- if (STM.hasGFX90AInsts())
- KernelDescriptor.compute_pgm_rsrc3 =
- CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
+ STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx);
- if (AMDGPU::hasKernargPreload(STM))
- KernelDescriptor.kernarg_preload =
- static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+ KernelDescriptor.kernarg_preload = MCConstantExpr::create(
+ AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
+ Ctx);
return KernelDescriptor;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 79326cd3d3289f..b8b2718d293e69 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -28,15 +28,12 @@ class MCCodeEmitter;
class MCOperand;
namespace AMDGPU {
+struct MCKernelDescriptor;
namespace HSAMD {
class MetadataStreamer;
}
} // namespace AMDGPU
-namespace amdhsa {
-struct kernel_descriptor_t;
-}
-
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
unsigned CodeObjectVersion;
@@ -75,9 +72,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint16_t getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const;
- amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor(
- const MachineFunction &MF,
- const SIProgramInfo &PI) const;
+ AMDGPU::MCKernelDescriptor
+ getAmdhsaKernelDescriptor(const MachineFunction &MF,
+ const SIProgramInfo &PI) const;
void initTargetStreamer(Module &M);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 529705479646fc..72154afb2c010e 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
@@ -5417,7 +5418,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
+ AMDGPU::MCKernelDescriptor KD =
+ AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
+ &getSTI(), getContext());
StringSet<> Seen;
@@ -5457,89 +5460,111 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError(".amdhsa_ directives cannot be repeated");
SMLoc ValStart = getLoc();
- int64_t IVal;
- if (getParser().parseAbsoluteExpression(IVal))
+ const MCExpr *ExprVal;
+ if (getParser().parseExpression(ExprVal))
return true;
SMLoc ValEnd = getLoc();
SMRange ValRange = SMRange(ValStart, ValEnd);
- if (IVal < 0)
- return OutOfRangeError(ValRange);
-
+ int64_t IVal = 0;
uint64_t Val = IVal;
+ bool EvaluatableExpr;
+ if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
+ if (IVal < 0)
+ return OutOfRangeError(ValRange);
+ Val = IVal;
+ }
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
- if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
+ if (!isUInt<ENTRY##_WIDTH>(Val)) \
return OutOfRangeError(RANGE); \
- AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+ AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
+ getContext());
+
+// Some fields use the parsed value immediately which requires the expression to
+// be solvable.
+#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
+ if (!(RESOLVED)) \
+ return Error(IDRange.Start, "directive should have resolvable expression", \
+ IDRange);
if (ID == ".amdhsa_group_segment_fixed_size") {
- if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
+ CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.group_segment_fixed_size = Val;
+ KD.group_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_private_segment_fixed_size") {
- if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
+ CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.private_segment_fixed_size = Val;
+ KD.private_segment_fixed_size = ExprVal;
} else if (ID == ".amdhsa_kernarg_size") {
- if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
+ if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
- KD.kernarg_size = Val;
+ KD.kernarg_size = ExprVal;
} else if (ID == ".amdhsa_user_sgpr_count") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val > getMaxNumUserSGPRs())
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
ValRange);
if (Val) {
ImpliedUserSGPRCount += Val;
PreloadLength = Val;
}
} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!hasKernargPreload())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
if (Val >= 1024)
return OutOfRangeError(ValRange);
- PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
+ PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
ValRange);
if (Val)
PreloadOffset = Val;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
@@ -5548,34 +5573,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
- ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
- Val, ValRange);
+ ExprVal, ValRange);
if (Val)
ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
EnableWavefrontSize32 = Val;
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
- Val, ValRange);
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_uses_dynamic_stack") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
- KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_enable_private_segment") {
if (!hasArchitectedFlatScratch())
return Error(
@@ -5583,42 +5613,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
"directive is not supported without architected flat scratch",
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
+ COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
ValRange);
} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
- COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+ COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
VGPRRange = ValRange;
NextFreeVGPR = Val;
} else if (ID == ".amdhsa_next_free_sgpr") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
SGPRRange = ValRange;
NextFreeSGPR = Val;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
AccumOffset = Val;
} else if (ID == ".amdhsa_reserve_vcc") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (!isUInt<1>(Val))
return OutOfRangeError(ValRange);
ReserveVCC = Val;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
+ EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
@@ -5638,97 +5674,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IDRange);
} else if (ID == ".amdhsa_float_round_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_round_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_32") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
+ ValRange);
} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+ COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
ValRange);
} else if (ID == ".amdhsa_dx10_clamp") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
ValRange);
} else if (ID == ".amdhsa_ieee_mode") {
if (IVersion.Major >= 12)
return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
- COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
+ COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_fp16_overflow") {
if (IVersion.Major < 9)
return Error(IDRange.Start, "directive requires gfx9+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
ValRange);
} else if (ID == ".amdhsa_tg_split") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
- ValRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
+ ExprVal, ValRange);
} else if (ID == ".amdhsa_workgroup_processor_mode") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
ValRange);
} else if (ID == ".amdhsa_memory_ordered") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+ COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
ValRange);
} else if (ID == ".amdhsa_forward_progress") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
- PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PL...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
✅ With the latest revision this PR passed the Python code formatter. |
Kernel descriptor attributes, with their respective emit and asm parse functionality, converted to MCExpr. Required for moving function/program resource usage information propagation to MC layer. As a result of this change, some amdhsa directives in assembly can use asm symbols that are defined later than their use.
Relands #80855 with fixes