-
Notifications
You must be signed in to change notification settings - Fork 14.3k
MCExpr-ify AMDGPU PALMetadata #93236
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Janek van Oirschot (JanekvO) ChangesAllows MCExprs as passed values to PALMetadata. Also adds related Patch is 35.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93236.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index cad4a3430327b..f4028adc84828 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1194,6 +1194,30 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
getLdsDwGranularity(ST) * sizeof(uint32_t)));
}
+static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
+ unsigned Shift = 0;
+ unsigned Mask = 0;
+
+ Mask = ~Value;
+ for (; !(Mask & 1); Shift++, Mask >>= 1) {
+ }
+
+ return std::make_pair(Shift, Mask);
+}
+
+static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
+ }
+ return Val;
+}
+
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
// is AMDPAL. It stores each compute/SPI register setting and other PAL
// metadata items into the PALMD::Metadata, combining with any provided by the
@@ -1207,41 +1231,49 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
auto &Ctx = MF.getContext();
MD->setEntryPoint(CC, MF.getFunction().getName());
- MD->setNumUsedVgprs(
- CC, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
+ MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
// Only set AGPRs for supported devices
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
if (STM.hasMAIInsts()) {
- MD->setNumUsedAgprs(CC, getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx));
+ MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
}
- MD->setNumUsedSgprs(
- CC, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
+ MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
if (MD->getPALMajorVersion() < 3) {
- MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM));
+ MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
if (AMDGPU::isCompute(CC)) {
- MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2());
+ MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
} else {
- if (getMCExprValue(CurrentProgramInfo.ScratchBlocks, Ctx) > 0)
- MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
+ const MCExpr *HasScratchBlocks =
+ MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,
+ MCConstantExpr::create(0, Ctx), Ctx);
+ auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);
+ MD->setRsrc2(CC, MaskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
}
} else {
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
- MD->setHwStage(CC, ".scratch_en",
- (bool)getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx));
+ MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,
+ CurrentProgramInfo.ScratchEnable);
EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
}
// ScratchSize is in bytes, 16 aligned.
MD->setScratchSize(
- CC, alignTo(getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), 16));
+ CC,
+ AMDGPUVariadicMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize,
+ MCConstantExpr::create(16, Ctx), Ctx),
+ Ctx);
+
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
: CurrentProgramInfo.LDSBlocks;
if (MD->getPALMajorVersion() < 3) {
- MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
+ MD->setRsrc2(
+ CC,
+ MCConstantExpr::create(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize), Ctx),
+ Ctx);
MD->setSpiPsInputEna(MFI->getPSInputEnable());
MD->setSpiPsInputAddr(MFI->getPSInputAddr());
} else {
@@ -1288,20 +1320,19 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
if (MD->getPALMajorVersion() < 3) {
// Set compute registers
- MD->setRsrc1(CallingConv::AMDGPU_CS,
- CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
+ MD->setRsrc1(
+ CallingConv::AMDGPU_CS,
+ CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx);
MD->setRsrc2(CallingConv::AMDGPU_CS,
- CurrentProgramInfo.getComputePGMRSrc2());
+ CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
} else {
EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
}
// Set optional info
MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
- MD->setFunctionNumUsedVgprs(
- FnName, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
- MD->setFunctionNumUsedSgprs(
- FnName, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
+ MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+ MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
}
// This is supposed to be log2(Size)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp
new file mode 100644
index 0000000000000..3955f557b9a25
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp
@@ -0,0 +1,64 @@
+//===- AMDGPUDelayedMCExpr.cpp - Delayed MCExpr resolve ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUDelayedMCExpr.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type,
+ MCValue Val) {
+ msgpack::Document *Doc = DN.getDocument();
+ switch (Type) {
+ default:
+ return Doc->getEmptyNode();
+ case msgpack::Type::Int:
+ return Doc->getNode(static_cast<int64_t>(Val.getConstant()));
+ case msgpack::Type::UInt:
+ return Doc->getNode(static_cast<uint64_t>(Val.getConstant()));
+ case msgpack::Type::Boolean:
+ return Doc->getNode(static_cast<bool>(Val.getConstant()));
+ }
+}
+
+void DelayedMCExpr::AssignDocNode(msgpack::DocNode &DN, msgpack::Type Type,
+ const MCExpr *Expr) {
+ MCValue Res;
+ if (Expr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
+ if (Res.isAbsolute()) {
+ DN = getNode(DN, Type, Res);
+ return;
+ }
+ }
+
+ DelayedExprs.push_back(DelayedExpr{DN, Type, Expr});
+}
+
+bool DelayedMCExpr::ResolveDelayedExpressions() {
+ bool Success;
+
+ while (!DelayedExprs.empty()) {
+ DelayedExpr DE = DelayedExprs.front();
+ MCValue Res;
+
+ Success = DE.Expr->evaluateAsRelocatable(Res, nullptr, nullptr);
+ Success &= Res.isAbsolute();
+ if (!Success)
+ return false;
+
+ DelayedExprs.pop_front();
+ DE.DN = getNode(DE.DN, DE.Type, Res);
+ }
+
+ return true;
+}
+
+void DelayedMCExpr::clear() { DelayedExprs.clear(); }
+
+bool DelayedMCExpr::empty() { return DelayedExprs.empty(); }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h
new file mode 100644
index 0000000000000..c546660a0d996
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h
@@ -0,0 +1,39 @@
+//===- AMDGPUDelayedMCExpr.h - Delayed MCExpr resolve -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include <deque>
+
+namespace llvm {
+class MCExpr;
+
+class DelayedMCExpr {
+ struct DelayedExpr {
+ msgpack::DocNode &DN;
+ msgpack::Type Type;
+ const MCExpr *Expr;
+ DelayedExpr(msgpack::DocNode &DN, msgpack::Type Type, const MCExpr *Expr)
+ : DN(DN), Type(Type), Expr(Expr) {}
+ };
+
+ std::deque<DelayedExpr> DelayedExprs;
+
+public:
+ bool ResolveDelayedExpressions();
+ void AssignDocNode(msgpack::DocNode &DN, msgpack::Type Type,
+ const MCExpr *Expr);
+ void clear();
+ bool empty();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 0fa67c559cb29..4597dab142470 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -20,6 +20,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/EndianStream.h"
@@ -137,12 +138,22 @@ void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, unsigned Val) {
setRegister(getRsrc1Reg(CC), Val);
}
+void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ setRegister(getRsrc1Reg(CC), Val, Ctx);
+}
+
// Set the rsrc2 register in the metadata for a particular shader stage.
// In fact this ORs the value into any previous setting of the register.
void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, unsigned Val) {
setRegister(getRsrc1Reg(CC) + 1, Val);
}
+void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ setRegister(getRsrc1Reg(CC) + 1, Val, Ctx);
+}
+
// Set the SPI_PS_INPUT_ENA register in the metadata.
// In fact this ORs the value into any previous setting of the register.
void AMDGPUPALMetadata::setSpiPsInputEna(unsigned Val) {
@@ -182,6 +193,40 @@ void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) {
N = N.getDocument()->getNode(Val);
}
+// Set a register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRegister(unsigned Reg, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (!isLegacy()) {
+ // In the new MsgPack format, ignore register numbered >= 0x10000000. It
+ // is a PAL ABI pseudo-register in the old non-MsgPack format.
+ if (Reg >= 0x10000000)
+ return;
+ }
+ auto &N = getRegisters()[MsgPackDoc.getNode(Reg)];
+ bool RegSeenInREM = REM.find(Reg) != REM.end();
+
+ if (RegSeenInREM) {
+ Val = MCBinaryExpr::createOr(Val, REM[Reg], Ctx);
+ // This conditional may be redundant most of the time, but
+ // setRegister(unsigned, unsigned) could've been called while RegSeenInREM
+ // is true.
+ if (N.getKind() == msgpack::Type::UInt) {
+ const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx);
+ Val = MCBinaryExpr::createOr(Val, NExpr, Ctx);
+ }
+ REM[Reg] = Val;
+ } else if (N.getKind() == msgpack::Type::UInt) {
+ const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx);
+ Val = MCBinaryExpr::createOr(Val, NExpr, Ctx);
+ int64_t Unused;
+ if (!Val->evaluateAsAbsolute(Unused))
+ REM[Reg] = Val;
+ (void)Unused;
+ }
+ DelayedExprs.AssignDocNode(N, msgpack::Type::UInt, Val);
+}
+
// Set the entry point name for one shader.
void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
if (isLegacy())
@@ -207,11 +252,29 @@ void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedVgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_VGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedVgprsKey, Val, Ctx);
+ return;
+ }
+ // Msgpack format.
+ setHwStage(CC, ".vgpr_count", msgpack::Type::UInt, Val);
+}
+
// Set the number of used agprs in the metadata.
void AMDGPUPALMetadata::setNumUsedAgprs(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".agpr_count"] = Val;
}
+void AMDGPUPALMetadata::setNumUsedAgprs(unsigned CC, const MCExpr *Val) {
+ setHwStage(CC, ".agpr_count", msgpack::Type::UInt, Val);
+}
+
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
@@ -228,6 +291,20 @@ void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setNumUsedSgprs(unsigned CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedSgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_SGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedSgprsKey, Val, Ctx);
+ return;
+ }
+ // Msgpack format.
+ setHwStage(CC, ".sgpr_count", msgpack::Type::UInt, Val);
+}
+
// Set the scratch size in the metadata.
void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
if (isLegacy()) {
@@ -239,6 +316,17 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setScratchSize(unsigned CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ setRegister(getScratchSizeKey(CC), Val, Ctx);
+ return;
+ }
+ // Msgpack format.
+ setHwStage(CC, ".scratch_memory_size", msgpack::Type::UInt, Val);
+}
+
// Set the stack frame size of a function in the metadata.
void AMDGPUPALMetadata::setFunctionScratchSize(StringRef FnName, unsigned Val) {
auto Node = getShaderFunction(FnName);
@@ -259,6 +347,12 @@ void AMDGPUPALMetadata::setFunctionNumUsedVgprs(StringRef FnName,
Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setFunctionNumUsedVgprs(StringRef FnName,
+ const MCExpr *Val) {
+ auto Node = getShaderFunction(FnName);
+ DelayedExprs.AssignDocNode(Node[".vgpr_count"], msgpack::Type::UInt, Val);
+}
+
// Set the number of used vgprs in the metadata.
void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName,
unsigned Val) {
@@ -266,6 +360,12 @@ void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName,
Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName,
+ const MCExpr *Val) {
+ auto Node = getShaderFunction(FnName);
+ DelayedExprs.AssignDocNode(Node[".sgpr_count"], msgpack::Type::UInt, Val);
+}
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {
@@ -662,6 +762,7 @@ void AMDGPUPALMetadata::toString(std::string &String) {
String.clear();
if (!BlobType)
return;
+ ResolvedAll = DelayedExprs.ResolveDelayedExpressions();
raw_string_ostream Stream(String);
if (isLegacy()) {
if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil)
@@ -711,6 +812,7 @@ void AMDGPUPALMetadata::toString(std::string &String) {
// a .note record of the specified AMD type. Returns an empty blob if
// there is no PAL metadata,
void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
+ ResolvedAll = DelayedExprs.ResolveDelayedExpressions();
if (Type == ELF::NT_AMD_PAL_METADATA)
toLegacyBlob(Blob);
else if (Type)
@@ -906,11 +1008,17 @@ void AMDGPUPALMetadata::setLegacy() {
// Erase all PAL metadata.
void AMDGPUPALMetadata::reset() {
MsgPackDoc.clear();
+ REM.clear();
+ DelayedExprs.clear();
Registers = MsgPackDoc.getEmptyNode();
HwStages = MsgPackDoc.getEmptyNode();
ShaderFunctions = MsgPackDoc.getEmptyNode();
}
+bool AMDGPUPALMetadata::resolvedAllMCExpr() {
+ return ResolvedAll && DelayedExprs.empty();
+}
+
unsigned AMDGPUPALMetadata::getPALVersion(unsigned idx) {
assert(idx < 2 &&
"illegal index to PAL version - should be 0 (major) or 1 (minor)");
@@ -942,6 +1050,11 @@ void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, bool Val) {
getHwStage(CC)[field] = Val;
}
+void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field,
+ msgpack::Type Type, const MCExpr *Val) {
+ DelayedExprs.AssignDocNode(getHwStage(CC)[field], Type, Val);
+}
+
void AMDGPUPALMetadata::setComputeRegisters(StringRef field, unsigned Val) {
getComputeRegisters()[field] = Val;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 158f766d04854..1dcdd4b985142 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -13,7 +13,10 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+#include "AMDGPUDelayedMCExpr.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/MC/MCContext.h"
namespace llvm {
@@ -21,6 +24,10 @@ class Module;
class StringRef;
class AMDGPUPALMetadata {
+public:
+ using RegisterExprMap = DenseMap<unsigned, const MCExpr *>;
+
+private:
unsigned BlobType = 0;
msgpack::Document MsgPackDoc;
msgpack::DocNode Registers;
@@ -32,6 +39,10 @@ class AMDGPUPALMetadata {
msgpack::DocNode ComputeRegisters;
msgpack::DocNode GraphicsRegisters;
+ DelayedMCExpr DelayedExprs;
+ RegisterExprMap REM;
+ bool ResolvedAll = true;
+
public:
// Read the amdgpu.pal.metadata supplied by the frontend, ready for
// per-function modification.
@@ -45,10 +56,12 @@ class AMDGPUPALMetadata {
// Set the rsrc1 register in the metadata for a particular shader stage.
// In fact this ORs the value into any previous setting of the register.
void setRsrc1(unsigned CC, unsigned Val);
+ void setRsrc1(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the rsrc2 register in the metadata for a particular shader stage.
// In fact this ORs the value into any previous setting of the register.
void setRsrc2(unsigned CC, unsigned Val);
+ void setRsrc2(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the SPI_PS_INPUT_ENA register in the metadata.
// In fact this ORs the value into any previous setting of the register.
@@ -64,6 +77,7 @@ class AMDGPUPALMetadata {
// Set a register in the metadata.
// In fact this ORs the value into any previous setting of the register.
void setRegister(unsigned Reg, unsigned Val);
+ void setRegister(unsigned Reg, const MCExpr *Val, MCContext &Ctx);
// Set the entry point name for one shader.
void setEntryPoint(unsigned CC, StringRef Name);
@@ -72,18 +86,22 @@ class AMDGPUPALMetadata {
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of vgprs to allocate.
void setNumUsedVgprs(unsigned CC, unsigned Val);
+ void setNumUsedVgprs(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the number of used agprs in the metadata. This is an optional a...
[truncated]
|
for (; !(Mask & 1); Shift++, Mask >>= 1) { | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this maskTrailingZeroes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not quite, I should've specified that it is specifically for the C_*
defines in SIDefines.h
. I now moved this helper function , together with the MCExpr set and get in the same file, to a utils header. I also combined the already used equivalents in AMDKernelCodeTUtils.cpp
to said utils header.
…header, add comments with explanations
… remove redundant inline
bool RegSeenInREM = REM.find(Reg) != REM.end(); | ||
|
||
if (RegSeenInREM) { | ||
Val = MCBinaryExpr::createOr(Val, REM[Reg], Ctx); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You do the lookup in REM, throw away the iterator, and then do the lookup again several times in the function. Just do the lookup once?
Allows MCExprs as passed values to PALMetadata. Also adds related `DelayedMCExpr` classes which serve as a pseudo-fixup to resolve MCExprs as late as possible (i.e., right before emit through string or blob, where they should be resolvable).
Allows MCExprs as passed values to PALMetadata. Also adds related
DelayedMCExpr
classes which serve as a pseudo-fixup to resolve MCExprs as late as possible (i.e., right before emit through string or blob, where they should be resolvable).