Skip to content

MCExpr-ify SIProgramInfo #88257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
421 changes: 288 additions & 133 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {

void initTargetStreamer(Module &M);

static uint64_t getMCExprValue(const MCExpr *Value, MCContext &Ctx);

public:
explicit AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
Expand Down
30 changes: 23 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "SIMachineFunctionInfo.h"
#include "SIProgramInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
using namespace llvm;

static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
Expand Down Expand Up @@ -462,6 +464,16 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
const Function &F = MF.getFunction();

auto GetMCExprValue = [&MF](const MCExpr *Value) {
int64_t Val;
if (!Value->evaluateAsAbsolute(Val)) {
MCContext &Ctx = MF.getContext();
Ctx.reportError(SMLoc(), "could not resolve expression when required.");
Val = 0;
}
return static_cast<uint64_t>(Val);
};

auto Kern = HSAMetadataDoc->getMapNode();

Align MaxKernArgAlign;
Expand All @@ -470,10 +482,11 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
Kern[".group_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
Kern[".uses_dynamic_stack"] =
Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.ScratchSize));
if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
Kern[".uses_dynamic_stack"] = Kern.getDocument()->getNode(
static_cast<bool>(GetMCExprValue(ProgramInfo.DynamicCallStack)));
}

if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
Kern[".workgroup_processor_mode"] =
Expand All @@ -484,12 +497,15 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
Kern[".wavefront_size"] =
Kern.getDocument()->getNode(STM.getWavefrontSize());
Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
Kern[".sgpr_count"] =
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumSGPR));
Kern[".vgpr_count"] =
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumVGPR));

// Only add AGPR count to metadata for supported devices
if (STM.hasMAIInsts()) {
Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
Kern[".agpr_count"] =
Kern.getDocument()->getNode(GetMCExprValue(ProgramInfo.NumAccVGPR));
}

Kern[".max_flat_workgroup_size"] =
Expand Down
25 changes: 2 additions & 23 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -664,29 +664,8 @@ bool GCNSubtarget::useVGPRIndexMode() const {
bool GCNSubtarget::useAA() const { return UseAA; }

unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return getMaxWavesPerEU();

if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
return 10;
if (SGPRs <= 88)
return 9;
if (SGPRs <= 100)
return 8;
return 7;
}
if (SGPRs <= 48)
return 10;
if (SGPRs <= 56)
return 9;
if (SGPRs <= 64)
return 8;
if (SGPRs <= 72)
return 7;
if (SGPRs <= 80)
return 6;
return 5;
return AMDGPU::IsaInfo::getOccupancyWithNumSGPRs(SGPRs, getMaxWavesPerEU(),
getGeneration());
}

unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned NumVGPRs) const {
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8406,12 +8406,16 @@ bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
AGVK VK = StringSwitch<AGVK>(TokenId)
.Case("max", AGVK::AGVK_Max)
.Case("or", AGVK::AGVK_Or)
.Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
.Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
.Case("alignto", AGVK::AGVK_AlignTo)
.Case("occupancy", AGVK::AGVK_Occupancy)
.Default(AGVK::AGVK_None);

if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
SmallVector<const MCExpr *, 4> Exprs;
uint64_t CommaCount = 0;
lex(); // Eat 'max'/'or'
lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
lex(); // Eat '('
while (true) {
if (trySkipToken(AsmToken::RParen)) {
Expand Down
201 changes: 201 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
//===----------------------------------------------------------------------===//

#include "AMDGPUMCExpr.h"
#include "GCNSubtarget.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This include is also questionable layering-wise. MCTargetDesc is supposed to be a small(ish) library independent of the main Target library, but now it includes GCNSubtarget.h from the main target library, which pulls in AMDGPURegisterBankInfo.h which includes generated headers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is wrong, and a leftover. Anything from the subtarget should be used from Utils/AMDGPUBaseInfo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My constraint was that I had to compute things like occupancy and extrasgpr usage in places where I couldn't assume MCExpr as resolvable and therefore had to delay computation through some of these custom MCExprs. Said computations were normally done through a lot of the GCNSubtarget calls unfortunately. I'll look into whether I can move some of these around to Utils/AMDGPUBaseInfo.

#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
Expand All @@ -16,6 +19,7 @@
#include <optional>

using namespace llvm;
using namespace llvm::AMDGPU;

AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr(VariadicKind Kind,
ArrayRef<const MCExpr *> Args,
Expand Down Expand Up @@ -61,6 +65,18 @@ void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS,
case AGVK_Max:
OS << "max(";
break;
case AGVK_ExtraSGPRs:
OS << "extrasgprs(";
break;
case AGVK_TotalNumVGPRs:
OS << "totalnumvgprs(";
break;
case AGVK_AlignTo:
OS << "alignto(";
break;
case AGVK_Occupancy:
OS << "occupancy(";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The occupancy isn't a standalone concept, it's derivable from everything else. Why does it need its own expression?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would've liked to avoid the occupancy expressions but the SIProgramInfo struct has a member for occupancy which is derived from (among other things) the NumVGPRs and NumSGPRs which will be MCExprs and could possibly be unresolved at the time SIProgramInfo's occupancy is computed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't that just used for the comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it seems to be used only for comments and remarks. However, there is a check in getSIProgramInfo on this occupancy that may error out of compilation.

break;
}
for (auto It = Args.begin(); It != Args.end(); ++It) {
(*It)->print(OS, MAI, /*InParens=*/false);
Expand All @@ -82,10 +98,151 @@ static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
}
}

bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
MCValue MCVal;
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
!MCVal.isAbsolute())
return false;

ConstantValue = MCVal.getConstant();
return true;
};

assert(Args.size() == 3 &&
"AMDGPUVariadic Argument count incorrect for ExtraSGPRs");
const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;

bool Success = TryGetMCExprValue(Args[2], XNACKUsed);

assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant");
if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) ||
!TryGetMCExprValue(Args[1], FlatScrUsed))
return false;

uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed);
Res = MCValue::get(ExtraSGPRs);
return true;
}

bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
MCValue MCVal;
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
!MCVal.isAbsolute())
return false;

ConstantValue = MCVal.getConstant();
return true;
};
assert(Args.size() == 2 &&
"AMDGPUVariadic Argument count incorrect for TotalNumVGPRs");
const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
uint64_t NumAGPR = 0, NumVGPR = 0;

bool Has90AInsts = AMDGPU::isGFX90A(*STI);

if (!TryGetMCExprValue(Args[0], NumAGPR) ||
!TryGetMCExprValue(Args[1], NumVGPR))
return false;

uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR
: std::max(NumVGPR, NumAGPR);
Res = MCValue::get(TotalNum);
return true;
}

bool AMDGPUVariadicMCExpr::evaluateAlignTo(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
MCValue MCVal;
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
!MCVal.isAbsolute())
return false;

ConstantValue = MCVal.getConstant();
return true;
};

assert(Args.size() == 2 &&
"AMDGPUVariadic Argument count incorrect for AlignTo");
uint64_t Value = 0, Align = 0;
if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align))
return false;

Res = MCValue::get(alignTo(Value, Align));
return true;
}

bool AMDGPUVariadicMCExpr::evaluateOccupancy(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const {
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
MCValue MCVal;
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
!MCVal.isAbsolute())
return false;

ConstantValue = MCVal.getConstant();
return true;
};
assert(Args.size() == 7 &&
"AMDGPUVariadic Argument count incorrect for Occupancy");
uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
NumSGPRs, NumVGPRs;

bool Success = true;
Success &= TryGetMCExprValue(Args[0], MaxWaves);
Success &= TryGetMCExprValue(Args[1], Granule);
Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs);
Success &= TryGetMCExprValue(Args[3], Generation);
Success &= TryGetMCExprValue(Args[4], InitOccupancy);

assert(Success && "Arguments 1 to 5 for Occupancy should be known constants");

if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) ||
!TryGetMCExprValue(Args[6], NumVGPRs))
return false;

unsigned Occupancy = InitOccupancy;
if (NumSGPRs)
Occupancy = std::min(
Occupancy, IsaInfo::getOccupancyWithNumSGPRs(
NumSGPRs, MaxWaves,
static_cast<AMDGPUSubtarget::Generation>(Generation)));
if (NumVGPRs)
Occupancy = std::min(Occupancy,
IsaInfo::getNumWavesPerEUWithNumVGPRs(
NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));

Res = MCValue::get(Occupancy);
return true;
}

bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl(
MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
std::optional<int64_t> Total;

switch (Kind) {
default:
break;
case AGVK_ExtraSGPRs:
return evaluateExtraSGPRs(Res, Layout, Fixup);
case AGVK_AlignTo:
return evaluateAlignTo(Res, Layout, Fixup);
case AGVK_TotalNumVGPRs:
return evaluateTotalNumVGPR(Res, Layout, Fixup);
case AGVK_Occupancy:
return evaluateOccupancy(Res, Layout, Fixup);
}

for (const MCExpr *Arg : Args) {
MCValue ArgRes;
if (!Arg->evaluateAsRelocatable(ArgRes, Layout, Fixup) ||
Expand Down Expand Up @@ -113,3 +270,47 @@ MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const {
}
return nullptr;
}

/// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
/// are unresolvable but needed for further MCExprs). Derived from
/// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
///
const AMDGPUVariadicMCExpr *
AMDGPUVariadicMCExpr::createExtraSGPRs(const MCExpr *VCCUsed,
const MCExpr *FlatScrUsed,
bool XNACKUsed, MCContext &Ctx) {

return create(AGVK_ExtraSGPRs,
{VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)},
Ctx);
}

const AMDGPUVariadicMCExpr *AMDGPUVariadicMCExpr::createTotalNumVGPR(
const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx) {
return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
}

/// Mimics GCNSubtarget::computeOccupancy for MCExpr.
///
/// Remove dependency on GCNSubtarget and depend only only the necessary values
/// for said occupancy computation. Should match computeOccupancy implementation
/// without passing \p STM on.
const AMDGPUVariadicMCExpr *
AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
const MCExpr *NumVGPRs,
const GCNSubtarget &STM, MCContext &Ctx) {
unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM);
unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM);
unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM);
unsigned Generation = STM.getGeneration();

auto CreateExpr = [&Ctx](unsigned Value) {
return MCConstantExpr::create(Value, Ctx);
};

return create(AGVK_Occupancy,
{CreateExpr(MaxWaves), CreateExpr(Granule),
CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation),
CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
Ctx);
}
Loading