Skip to content

Commit d6c7253

Browse files
authored
[AMDGPU] Add pal metadata 3.0 support to callable pal funcs (llvm#67104)
PAL Metadata 3.0 introduces an explicit structure in metadata for the programmable registers written out by the compiler backend. The previous approach used opaque registers which can change between different architectures and required encoding the bitfield information in the backend, which may change between versions. This change is an extension the previously added support - which only handled entry functions. This adds support for all functions. The change also includes some re-factoring to separate common code.
1 parent d4c5aca commit d6c7253

File tree

4 files changed

+347
-22
lines changed

4 files changed

+347
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,27 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
10251025
OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
10261026
}
10271027

1028+
// Helper function to add common PAL Metadata 3.0+
1029+
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
1030+
const SIProgramInfo &CurrentProgramInfo,
1031+
CallingConv::ID CC, const GCNSubtarget &ST) {
1032+
if (ST.hasIEEEMode())
1033+
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
1034+
1035+
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
1036+
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
1037+
1038+
if (AMDGPU::isCompute(CC)) {
1039+
MD->setHwStage(CC, ".trap_present",
1040+
(bool)CurrentProgramInfo.TrapHandlerEnable);
1041+
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1042+
1043+
MD->setHwStage(CC, ".lds_size",
1044+
(unsigned)(CurrentProgramInfo.LdsSize *
1045+
getLdsDwGranularity(ST) * sizeof(uint32_t)));
1046+
}
1047+
}
1048+
10281049
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
10291050
// is AMDPAL. It stores each compute/SPI register setting and other PAL
10301051
// metadata items into the PALMD::Metadata, combining with any provided by the
@@ -1056,24 +1077,8 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
10561077
}
10571078
} else {
10581079
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
1059-
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
1060-
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
1061-
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
1062-
1063-
if (AMDGPU::isCompute(CC)) {
1064-
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
1065-
MD->setHwStage(CC, ".trap_present",
1066-
(bool)CurrentProgramInfo.TrapHandlerEnable);
1067-
1068-
// EXCPEnMSB?
1069-
const unsigned LdsDwGranularity = 128;
1070-
MD->setHwStage(CC, ".lds_size",
1071-
(unsigned)(CurrentProgramInfo.LdsSize * LdsDwGranularity *
1072-
sizeof(uint32_t)));
1073-
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1074-
} else {
1075-
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
1076-
}
1080+
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
1081+
EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
10771082
}
10781083

10791084
// ScratchSize is in bytes, 16 aligned.
@@ -1127,10 +1132,15 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
11271132
MD->setFunctionScratchSize(FnName, MFI.getStackSize());
11281133
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
11291134

1130-
// Set compute registers
1131-
MD->setRsrc1(CallingConv::AMDGPU_CS,
1132-
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
1133-
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2());
1135+
if (MD->getPALMajorVersion() < 3) {
1136+
// Set compute registers
1137+
MD->setRsrc1(CallingConv::AMDGPU_CS,
1138+
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
1139+
MD->setRsrc2(CallingConv::AMDGPU_CS,
1140+
CurrentProgramInfo.getComputePGMRSrc2());
1141+
} else {
1142+
EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
1143+
}
11341144

11351145
// Set optional info
11361146
MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2958,6 +2958,11 @@ bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
29582958
return hasAny64BitVGPROperands(OpDesc);
29592959
}
29602960

2961+
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
2962+
// Currently this is 128 for all subtargets
2963+
return 128;
2964+
}
2965+
29612966
} // namespace AMDGPU
29622967

29632968
raw_ostream &operator<<(raw_ostream &OS,

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,11 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID);
14391439
/// \returns true if the intrinsic is uniform
14401440
bool isIntrinsicAlwaysUniform(unsigned IntrID);
14411441

1442+
/// \returns lds block size in terms of dwords. \p
1443+
/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1444+
/// must be defined in terms of bytes.
1445+
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1446+
14421447
} // end namespace AMDGPU
14431448

14441449
raw_ostream &operator<<(raw_ostream &OS,

0 commit comments

Comments
 (0)