Skip to content

Commit 73814b6

Browse files
committed
[AMDGPU] MCExpr-ify AMDGPU HSAMetadata
1 parent 0bc7ebb commit 73814b6

22 files changed

+346
-440
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -249,14 +249,14 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
249249
getNameWithPrefix(KernelName, &MF->getFunction());
250250
getTargetStreamer()->EmitAmdhsaKernelDescriptor(
251251
STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
252-
getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Context),
253-
getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Context) -
254-
IsaInfo::getNumExtraSGPRs(
255-
&STM, getMCExprValue(CurrentProgramInfo.VCCUsed, Context),
256-
getMCExprValue(CurrentProgramInfo.FlatUsed, Context),
257-
getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
258-
getMCExprValue(CurrentProgramInfo.VCCUsed, Context),
259-
getMCExprValue(CurrentProgramInfo.FlatUsed, Context));
252+
CurrentProgramInfo.NumVGPRsForWavesPerEU,
253+
MCBinaryExpr::createSub(
254+
CurrentProgramInfo.NumSGPRsForWavesPerEU,
255+
AMDGPUVariadicMCExpr::createExtraSGPRs(
256+
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
257+
getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Context),
258+
Context),
259+
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
260260

261261
Streamer.popSection();
262262
}
@@ -401,9 +401,10 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
401401
false);
402402
}
403403

404-
uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
404+
const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
405405
const MachineFunction &MF) const {
406406
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
407+
MCContext &Ctx = MF.getContext();
407408
uint16_t KernelCodeProperties = 0;
408409
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
409410

@@ -436,11 +437,19 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
436437
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
437438
}
438439

439-
if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, MF.getContext()) &&
440-
CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
441-
KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
442-
443-
return KernelCodeProperties;
440+
// CurrentProgramInfo.DynamicCallStack is a MCExpr and could be
441+
// un-evaluatable at this point so it cannot be conditionally checked here.
442+
// Instead, we'll directly shift the possibly unknown MCExpr into its place
443+
// and bitwise-or it into KernelCodeProperties.
444+
const MCExpr *KernelCodePropExpr =
445+
MCConstantExpr::create(KernelCodeProperties, Ctx);
446+
const MCExpr *OrValue = MCConstantExpr::create(
447+
amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);
448+
OrValue = MCBinaryExpr::createShl(CurrentProgramInfo.DynamicCallStack,
449+
OrValue, Ctx);
450+
KernelCodePropExpr = MCBinaryExpr::createOr(KernelCodePropExpr, OrValue, Ctx);
451+
452+
return KernelCodePropExpr;
444453
}
445454

446455
MCKernelDescriptor
@@ -463,11 +472,13 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
463472

464473
KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx);
465474
KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(Ctx);
466-
KernelDescriptor.kernel_code_properties =
467-
MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx);
475+
KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
468476

469-
assert(STM.hasGFX90AInsts() ||
470-
getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx) == 0);
477+
int64_t PGRM_Rsrc3 = 1;
478+
bool EvaluatableRsrc3 =
479+
CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute(PGRM_Rsrc3);
480+
assert(STM.hasGFX90AInsts() || !EvaluatableRsrc3 ||
481+
static_cast<uint64_t>(PGRM_Rsrc3) == 0);
471482
KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A;
472483

473484
KernelDescriptor.kernarg_preload = MCConstantExpr::create(

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
6969
const SIProgramInfo &CurrentProgramInfo,
7070
bool isModuleEntryFunction, bool hasMAIInsts);
7171

72-
uint16_t getAmdhsaKernelCodeProperties(
73-
const MachineFunction &MF) const;
72+
const MCExpr *getAmdhsaKernelCodeProperties(const MachineFunction &MF) const;
7473

7574
AMDGPU::MCKernelDescriptor
7675
getAmdhsaKernelDescriptor(const MachineFunction &MF,

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -464,28 +464,19 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
464464
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
465465
const Function &F = MF.getFunction();
466466

467-
auto GetMCExprValue = [&MF](const MCExpr *Value) {
468-
int64_t Val;
469-
if (!Value->evaluateAsAbsolute(Val)) {
470-
MCContext &Ctx = MF.getContext();
471-
Ctx.reportError(SMLoc(), "could not resolve expression when required.");
472-
Val = 0;
473-
}
474-
return static_cast<uint64_t>(Val);
475-
};
476-
477467
auto Kern = HSAMetadataDoc->getMapNode();
478468

479469
Align MaxKernArgAlign;
480470
Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
481471
STM.getKernArgSegmentSize(F, MaxKernArgAlign));
482472
Kern[".group_segment_fixed_size"] =
483473
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
484-
Kern[".private_segment_fixed_size"] =
485-
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.ScratchSize));
474+
DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"],
475+
msgpack::Type::UInt, ProgramInfo.ScratchSize);
486476
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
487-
Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(
488-
static_cast<bool>(GetMCExprValue(ProgramInfo.DynamicCallStack)));
477+
DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"],
478+
msgpack::Type::Boolean,
479+
ProgramInfo.DynamicCallStack);
489480
}
490481

491482
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
@@ -497,15 +488,15 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
497488
Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
498489
Kern[".wavefront_size"] =
499490
Kern.getDocument()->getNode(STM.getWavefrontSize());
500-
Kern[".sgpr_count"] =
501-
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumSGPR));
502-
Kern[".vgpr_count"] =
503-
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumVGPR));
491+
DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt,
492+
ProgramInfo.NumSGPR);
493+
DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt,
494+
ProgramInfo.NumVGPR);
504495

505496
// Only add AGPR count to metadata for supported devices
506497
if (STM.hasMAIInsts()) {
507-
Kern[".agpr_count"] =
508-
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumAccVGPR));
498+
DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt,
499+
ProgramInfo.NumAccVGPR);
509500
}
510501

511502
Kern[".max_flat_workgroup_size"] =
@@ -527,6 +518,7 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
527518
}
528519

529520
bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
521+
DelayedExprs->resolveDelayedExpressions();
530522
return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
531523
}
532524

@@ -536,9 +528,11 @@ void MetadataStreamerMsgPackV4::begin(const Module &Mod,
536528
emitTargetID(TargetID);
537529
emitPrintf(Mod);
538530
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
531+
DelayedExprs->clear();
539532
}
540533

541534
void MetadataStreamerMsgPackV4::end() {
535+
DelayedExprs->resolveDelayedExpressions();
542536
std::string HSAMetadataString;
543537
raw_string_ostream StrOS(HSAMetadataString);
544538
HSAMetadataDoc->toYAML(StrOS);

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
1616
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
1717

18+
#include "Utils/AMDGPUDelayedMCExpr.h"
1819
#include "llvm/BinaryFormat/MsgPackDocument.h"
1920
#include "llvm/Support/AMDGPUMetadata.h"
2021
#include "llvm/Support/Alignment.h"
@@ -65,6 +66,9 @@ class MetadataStreamer {
6566
class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4
6667
: public MetadataStreamer {
6768
protected:
69+
std::unique_ptr<DelayedMCExpr> DelayedExprs =
70+
std::make_unique<DelayedMCExpr>();
71+
6872
std::unique_ptr<msgpack::Document> HSAMetadataDoc =
6973
std::make_unique<msgpack::Document>();
7074

0 commit comments

Comments
 (0)