Skip to content

Commit f365653

Browse files
committed
MCExpr-ify AMDGPU PALMetadata
1 parent 89245b6 commit f365653

File tree

13 files changed

+543
-20
lines changed

13 files changed

+543
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,30 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
11941194
getLdsDwGranularity(ST) * sizeof(uint32_t)));
11951195
}
11961196

1197+
static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
1198+
unsigned Shift = 0;
1199+
unsigned Mask = 0;
1200+
1201+
Mask = ~Value;
1202+
for (; !(Mask & 1); Shift++, Mask >>= 1) {
1203+
}
1204+
1205+
return std::make_pair(Shift, Mask);
1206+
}
1207+
1208+
static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
1209+
uint32_t Shift, MCContext &Ctx) {
1210+
if (Mask) {
1211+
const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
1212+
Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
1213+
}
1214+
if (Shift) {
1215+
const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
1216+
Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
1217+
}
1218+
return Val;
1219+
}
1220+
11971221
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
11981222
// is AMDPAL. It stores each compute/SPI register setting and other PAL
11991223
// metadata items into the PALMD::Metadata, combining with any provided by the
@@ -1207,41 +1231,49 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
12071231
auto &Ctx = MF.getContext();
12081232

12091233
MD->setEntryPoint(CC, MF.getFunction().getName());
1210-
MD->setNumUsedVgprs(
1211-
CC, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
1234+
MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
12121235

12131236
// Only set AGPRs for supported devices
12141237
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
12151238
if (STM.hasMAIInsts()) {
1216-
MD->setNumUsedAgprs(CC, getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx));
1239+
MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
12171240
}
12181241

1219-
MD->setNumUsedSgprs(
1220-
CC, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
1242+
MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
12211243
if (MD->getPALMajorVersion() < 3) {
1222-
MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM));
1244+
MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
12231245
if (AMDGPU::isCompute(CC)) {
1224-
MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2());
1246+
MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
12251247
} else {
1226-
if (getMCExprValue(CurrentProgramInfo.ScratchBlocks, Ctx) > 0)
1227-
MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
1248+
const MCExpr *HasScratchBlocks =
1249+
MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,
1250+
MCConstantExpr::create(0, Ctx), Ctx);
1251+
auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);
1252+
MD->setRsrc2(CC, MaskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
12281253
}
12291254
} else {
12301255
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
1231-
MD->setHwStage(CC, ".scratch_en",
1232-
(bool)getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx));
1256+
MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,
1257+
CurrentProgramInfo.ScratchEnable);
12331258
EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
12341259
}
12351260

12361261
// ScratchSize is in bytes, 16 aligned.
12371262
MD->setScratchSize(
1238-
CC, alignTo(getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), 16));
1263+
CC,
1264+
AMDGPUVariadicMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize,
1265+
MCConstantExpr::create(16, Ctx), Ctx),
1266+
Ctx);
1267+
12391268
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
12401269
unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
12411270
? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
12421271
: CurrentProgramInfo.LDSBlocks;
12431272
if (MD->getPALMajorVersion() < 3) {
1244-
MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1273+
MD->setRsrc2(
1274+
CC,
1275+
MCConstantExpr::create(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize), Ctx),
1276+
Ctx);
12451277
MD->setSpiPsInputEna(MFI->getPSInputEnable());
12461278
MD->setSpiPsInputAddr(MFI->getPSInputAddr());
12471279
} else {
@@ -1288,20 +1320,19 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
12881320

12891321
if (MD->getPALMajorVersion() < 3) {
12901322
// Set compute registers
1291-
MD->setRsrc1(CallingConv::AMDGPU_CS,
1292-
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
1323+
MD->setRsrc1(
1324+
CallingConv::AMDGPU_CS,
1325+
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx);
12931326
MD->setRsrc2(CallingConv::AMDGPU_CS,
1294-
CurrentProgramInfo.getComputePGMRSrc2());
1327+
CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
12951328
} else {
12961329
EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
12971330
}
12981331

12991332
// Set optional info
13001333
MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
1301-
MD->setFunctionNumUsedVgprs(
1302-
FnName, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
1303-
MD->setFunctionNumUsedSgprs(
1304-
FnName, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
1334+
MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1335+
MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
13051336
}
13061337

13071338
// This is supposed to be log2(Size)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//===- AMDGPUDelayedMCExpr.cpp - Delayed MCExpr resolve ---------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "AMDGPUDelayedMCExpr.h"
10+
#include "llvm/MC/MCExpr.h"
11+
#include "llvm/MC/MCValue.h"
12+
13+
using namespace llvm;
14+
15+
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type,
16+
MCValue Val) {
17+
msgpack::Document *Doc = DN.getDocument();
18+
switch (Type) {
19+
default:
20+
return Doc->getEmptyNode();
21+
case msgpack::Type::Int:
22+
return Doc->getNode(static_cast<int64_t>(Val.getConstant()));
23+
case msgpack::Type::UInt:
24+
return Doc->getNode(static_cast<uint64_t>(Val.getConstant()));
25+
case msgpack::Type::Boolean:
26+
return Doc->getNode(static_cast<bool>(Val.getConstant()));
27+
}
28+
}
29+
30+
void DelayedMCExpr::AssignDocNode(msgpack::DocNode &DN, msgpack::Type Type,
31+
const MCExpr *Expr) {
32+
MCValue Res;
33+
if (Expr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
34+
if (Res.isAbsolute()) {
35+
DN = getNode(DN, Type, Res);
36+
return;
37+
}
38+
}
39+
40+
DelayedExprs.push_back(DelayedExpr{DN, Type, Expr});
41+
}
42+
43+
bool DelayedMCExpr::ResolveDelayedExpressions() {
44+
bool Success;
45+
46+
while (!DelayedExprs.empty()) {
47+
DelayedExpr DE = DelayedExprs.front();
48+
MCValue Res;
49+
50+
Success = DE.Expr->evaluateAsRelocatable(Res, nullptr, nullptr);
51+
Success &= Res.isAbsolute();
52+
if (!Success)
53+
return false;
54+
55+
DelayedExprs.pop_front();
56+
DE.DN = getNode(DE.DN, DE.Type, Res);
57+
}
58+
59+
return true;
60+
}
61+
62+
void DelayedMCExpr::clear() { DelayedExprs.clear(); }
63+
64+
bool DelayedMCExpr::empty() { return DelayedExprs.empty(); }
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===- AMDGPUDelayedMCExpr.h - Delayed MCExpr resolve -----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
10+
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
11+
12+
#include "llvm/BinaryFormat/MsgPackDocument.h"
13+
#include <deque>
14+
15+
namespace llvm {
16+
class MCExpr;
17+
18+
class DelayedMCExpr {
19+
struct DelayedExpr {
20+
msgpack::DocNode &DN;
21+
msgpack::Type Type;
22+
const MCExpr *Expr;
23+
DelayedExpr(msgpack::DocNode &DN, msgpack::Type Type, const MCExpr *Expr)
24+
: DN(DN), Type(Type), Expr(Expr) {}
25+
};
26+
27+
std::deque<DelayedExpr> DelayedExprs;
28+
29+
public:
30+
bool ResolveDelayedExpressions();
31+
void AssignDocNode(msgpack::DocNode &DN, msgpack::Type Type,
32+
const MCExpr *Expr);
33+
void clear();
34+
bool empty();
35+
};
36+
37+
} // end namespace llvm
38+
39+
#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H

0 commit comments

Comments
 (0)