Skip to content

Commit c29b265

Browse files
committed
Reapply "[AMDGPU] Add pal metadata 3.0 support to callable pal funcs (#67104)"
This reverts commit 7d508eb.
1 parent a05910a commit c29b265

File tree

4 files changed

+347
-22
lines changed

4 files changed

+347
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,27 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
10331033
OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
10341034
}
10351035

1036+
// Helper function to add common PAL Metadata 3.0+
1037+
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
1038+
const SIProgramInfo &CurrentProgramInfo,
1039+
CallingConv::ID CC, const GCNSubtarget &ST) {
1040+
if (ST.hasIEEEMode())
1041+
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
1042+
1043+
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
1044+
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
1045+
1046+
if (AMDGPU::isCompute(CC)) {
1047+
MD->setHwStage(CC, ".trap_present",
1048+
(bool)CurrentProgramInfo.TrapHandlerEnable);
1049+
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1050+
1051+
MD->setHwStage(CC, ".lds_size",
1052+
(unsigned)(CurrentProgramInfo.LdsSize *
1053+
getLdsDwGranularity(ST) * sizeof(uint32_t)));
1054+
}
1055+
}
1056+
10361057
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
10371058
// is AMDPAL. It stores each compute/SPI register setting and other PAL
10381059
// metadata items into the PALMD::Metadata, combining with any provided by the
@@ -1064,24 +1085,8 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
10641085
}
10651086
} else {
10661087
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
1067-
MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
1068-
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
1069-
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
1070-
1071-
if (AMDGPU::isCompute(CC)) {
1072-
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
1073-
MD->setHwStage(CC, ".trap_present",
1074-
(bool)CurrentProgramInfo.TrapHandlerEnable);
1075-
1076-
// EXCPEnMSB?
1077-
const unsigned LdsDwGranularity = 128;
1078-
MD->setHwStage(CC, ".lds_size",
1079-
(unsigned)(CurrentProgramInfo.LdsSize * LdsDwGranularity *
1080-
sizeof(uint32_t)));
1081-
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1082-
} else {
1083-
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
1084-
}
1088+
MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
1089+
EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
10851090
}
10861091

10871092
// ScratchSize is in bytes, 16 aligned.
@@ -1135,10 +1140,15 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
11351140
MD->setFunctionScratchSize(FnName, MFI.getStackSize());
11361141
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
11371142

1138-
// Set compute registers
1139-
MD->setRsrc1(CallingConv::AMDGPU_CS,
1140-
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
1141-
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2());
1143+
if (MD->getPALMajorVersion() < 3) {
1144+
// Set compute registers
1145+
MD->setRsrc1(CallingConv::AMDGPU_CS,
1146+
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
1147+
MD->setRsrc2(CallingConv::AMDGPU_CS,
1148+
CurrentProgramInfo.getComputePGMRSrc2());
1149+
} else {
1150+
EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
1151+
}
11421152

11431153
// Set optional info
11441154
MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3055,6 +3055,11 @@ bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
30553055
return hasAny64BitVGPROperands(OpDesc);
30563056
}
30573057

3058+
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3059+
// Currently this is 128 for all subtargets
3060+
return 128;
3061+
}
3062+
30583063
} // namespace AMDGPU
30593064

30603065
raw_ostream &operator<<(raw_ostream &OS,

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,6 +1513,11 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID);
15131513
/// \returns true if the intrinsic is uniform
15141514
bool isIntrinsicAlwaysUniform(unsigned IntrID);
15151515

1516+
/// \returns lds block size in terms of dwords. \p
1517+
/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1518+
/// must be defined in terms of bytes.
1519+
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1520+
15161521
} // end namespace AMDGPU
15171522

15181523
raw_ostream &operator<<(raw_ostream &OS,

0 commit comments

Comments
 (0)