-
Notifications
You must be signed in to change notification settings - Fork 14.2k
MCExpr-ify SIProgramInfo #88257
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MCExpr-ify SIProgramInfo #88257
Changes from all commits
5da69c7
ca6f3a0
aa64a63
d8ec418
db82c77
38fb26b
7a45953
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,9 @@ | |
//===----------------------------------------------------------------------===// | ||
|
||
#include "AMDGPUMCExpr.h" | ||
#include "GCNSubtarget.h" | ||
#include "Utils/AMDGPUBaseInfo.h" | ||
#include "llvm/IR/Function.h" | ||
#include "llvm/MC/MCContext.h" | ||
#include "llvm/MC/MCStreamer.h" | ||
#include "llvm/MC/MCSymbol.h" | ||
|
@@ -16,6 +19,7 @@ | |
#include <optional> | ||
|
||
using namespace llvm; | ||
using namespace llvm::AMDGPU; | ||
|
||
AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr(VariadicKind Kind, | ||
ArrayRef<const MCExpr *> Args, | ||
|
@@ -61,6 +65,18 @@ void AMDGPUVariadicMCExpr::printImpl(raw_ostream &OS, | |
case AGVK_Max: | ||
OS << "max("; | ||
break; | ||
case AGVK_ExtraSGPRs: | ||
OS << "extrasgprs("; | ||
break; | ||
case AGVK_TotalNumVGPRs: | ||
OS << "totalnumvgprs("; | ||
break; | ||
case AGVK_AlignTo: | ||
OS << "alignto("; | ||
break; | ||
case AGVK_Occupancy: | ||
OS << "occupancy("; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The occupancy isn't a standalone concept, it's derivable from everything else. Why does it need its own expression? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would've liked to avoid the occupancy expressions but the SIProgramInfo struct has a member for occupancy which is derived from (among other things) the NumVGPRs and NumSGPRs which will be MCExprs and could possibly be unresolved at the time SIProgramInfo's occupancy is computed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't that just used for the comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it seems to be used only for comments and remarks. However, there is a check in |
||
break; | ||
} | ||
for (auto It = Args.begin(); It != Args.end(); ++It) { | ||
(*It)->print(OS, MAI, /*InParens=*/false); | ||
|
@@ -82,10 +98,151 @@ static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1, | |
} | ||
} | ||
|
||
bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs(MCValue &Res, | ||
const MCAsmLayout *Layout, | ||
const MCFixup *Fixup) const { | ||
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { | ||
MCValue MCVal; | ||
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) || | ||
!MCVal.isAbsolute()) | ||
return false; | ||
|
||
ConstantValue = MCVal.getConstant(); | ||
return true; | ||
}; | ||
|
||
assert(Args.size() == 3 && | ||
"AMDGPUVariadic Argument count incorrect for ExtraSGPRs"); | ||
const MCSubtargetInfo *STI = Ctx.getSubtargetInfo(); | ||
uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0; | ||
|
||
bool Success = TryGetMCExprValue(Args[2], XNACKUsed); | ||
|
||
assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant"); | ||
if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) || | ||
!TryGetMCExprValue(Args[1], FlatScrUsed)) | ||
return false; | ||
|
||
uint64_t ExtraSGPRs = IsaInfo::getNumExtraSGPRs( | ||
STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed); | ||
Res = MCValue::get(ExtraSGPRs); | ||
return true; | ||
} | ||
|
||
bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR(MCValue &Res, | ||
const MCAsmLayout *Layout, | ||
const MCFixup *Fixup) const { | ||
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { | ||
MCValue MCVal; | ||
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) || | ||
!MCVal.isAbsolute()) | ||
return false; | ||
|
||
ConstantValue = MCVal.getConstant(); | ||
return true; | ||
}; | ||
assert(Args.size() == 2 && | ||
"AMDGPUVariadic Argument count incorrect for TotalNumVGPRs"); | ||
const MCSubtargetInfo *STI = Ctx.getSubtargetInfo(); | ||
uint64_t NumAGPR = 0, NumVGPR = 0; | ||
|
||
bool Has90AInsts = AMDGPU::isGFX90A(*STI); | ||
|
||
if (!TryGetMCExprValue(Args[0], NumAGPR) || | ||
!TryGetMCExprValue(Args[1], NumVGPR)) | ||
return false; | ||
|
||
uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR | ||
: std::max(NumVGPR, NumAGPR); | ||
Res = MCValue::get(TotalNum); | ||
return true; | ||
} | ||
|
||
bool AMDGPUVariadicMCExpr::evaluateAlignTo(MCValue &Res, | ||
const MCAsmLayout *Layout, | ||
const MCFixup *Fixup) const { | ||
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { | ||
MCValue MCVal; | ||
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) || | ||
!MCVal.isAbsolute()) | ||
return false; | ||
|
||
ConstantValue = MCVal.getConstant(); | ||
return true; | ||
}; | ||
|
||
assert(Args.size() == 2 && | ||
"AMDGPUVariadic Argument count incorrect for AlignTo"); | ||
uint64_t Value = 0, Align = 0; | ||
if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align)) | ||
return false; | ||
|
||
Res = MCValue::get(alignTo(Value, Align)); | ||
return true; | ||
} | ||
|
||
bool AMDGPUVariadicMCExpr::evaluateOccupancy(MCValue &Res, | ||
const MCAsmLayout *Layout, | ||
const MCFixup *Fixup) const { | ||
auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) { | ||
MCValue MCVal; | ||
if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) || | ||
!MCVal.isAbsolute()) | ||
return false; | ||
|
||
ConstantValue = MCVal.getConstant(); | ||
return true; | ||
}; | ||
assert(Args.size() == 7 && | ||
"AMDGPUVariadic Argument count incorrect for Occupancy"); | ||
uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation, | ||
NumSGPRs, NumVGPRs; | ||
|
||
bool Success = true; | ||
Success &= TryGetMCExprValue(Args[0], MaxWaves); | ||
Success &= TryGetMCExprValue(Args[1], Granule); | ||
Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs); | ||
Success &= TryGetMCExprValue(Args[3], Generation); | ||
Success &= TryGetMCExprValue(Args[4], InitOccupancy); | ||
|
||
assert(Success && "Arguments 1 to 5 for Occupancy should be known constants"); | ||
|
||
if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) || | ||
!TryGetMCExprValue(Args[6], NumVGPRs)) | ||
return false; | ||
|
||
unsigned Occupancy = InitOccupancy; | ||
if (NumSGPRs) | ||
Occupancy = std::min( | ||
Occupancy, IsaInfo::getOccupancyWithNumSGPRs( | ||
NumSGPRs, MaxWaves, | ||
static_cast<AMDGPUSubtarget::Generation>(Generation))); | ||
if (NumVGPRs) | ||
Occupancy = std::min(Occupancy, | ||
IsaInfo::getNumWavesPerEUWithNumVGPRs( | ||
NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs)); | ||
|
||
Res = MCValue::get(Occupancy); | ||
return true; | ||
} | ||
|
||
bool AMDGPUVariadicMCExpr::evaluateAsRelocatableImpl( | ||
MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { | ||
std::optional<int64_t> Total; | ||
|
||
switch (Kind) { | ||
default: | ||
break; | ||
case AGVK_ExtraSGPRs: | ||
return evaluateExtraSGPRs(Res, Layout, Fixup); | ||
case AGVK_AlignTo: | ||
return evaluateAlignTo(Res, Layout, Fixup); | ||
case AGVK_TotalNumVGPRs: | ||
return evaluateTotalNumVGPR(Res, Layout, Fixup); | ||
case AGVK_Occupancy: | ||
return evaluateOccupancy(Res, Layout, Fixup); | ||
} | ||
|
||
for (const MCExpr *Arg : Args) { | ||
MCValue ArgRes; | ||
if (!Arg->evaluateAsRelocatable(ArgRes, Layout, Fixup) || | ||
|
@@ -113,3 +270,47 @@ MCFragment *AMDGPUVariadicMCExpr::findAssociatedFragment() const { | |
} | ||
return nullptr; | ||
} | ||
|
||
/// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed | ||
/// are unresolvable but needed for further MCExprs). Derived from | ||
/// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp. | ||
/// | ||
const AMDGPUVariadicMCExpr * | ||
AMDGPUVariadicMCExpr::createExtraSGPRs(const MCExpr *VCCUsed, | ||
const MCExpr *FlatScrUsed, | ||
bool XNACKUsed, MCContext &Ctx) { | ||
|
||
return create(AGVK_ExtraSGPRs, | ||
{VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)}, | ||
Ctx); | ||
} | ||
|
||
const AMDGPUVariadicMCExpr *AMDGPUVariadicMCExpr::createTotalNumVGPR( | ||
const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx) { | ||
return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx); | ||
} | ||
|
||
/// Mimics GCNSubtarget::computeOccupancy for MCExpr. | ||
/// | ||
/// Remove dependency on GCNSubtarget and depend only only the necessary values | ||
/// for said occupancy computation. Should match computeOccupancy implementation | ||
/// without passing \p STM on. | ||
const AMDGPUVariadicMCExpr * | ||
AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, | ||
const MCExpr *NumVGPRs, | ||
const GCNSubtarget &STM, MCContext &Ctx) { | ||
unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM); | ||
unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM); | ||
unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM); | ||
unsigned Generation = STM.getGeneration(); | ||
|
||
auto CreateExpr = [&Ctx](unsigned Value) { | ||
return MCConstantExpr::create(Value, Ctx); | ||
}; | ||
|
||
return create(AGVK_Occupancy, | ||
{CreateExpr(MaxWaves), CreateExpr(Granule), | ||
CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation), | ||
CreateExpr(InitOcc), NumSGPRs, NumVGPRs}, | ||
Ctx); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This include is also questionable layering-wise. MCTargetDesc is supposed to be a small(ish) library independent of the main Target library, but now it includes GCNSubtarget.h from the main target library, which pulls in AMDGPURegisterBankInfo.h which includes generated headers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is wrong, and a leftover. Anything from the subtarget should be used from Utils/AMDGPUBaseInfo
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My constraint was that I had to compute things like occupancy and extrasgpr usage in places where I couldn't assume MCExpr as resolvable and therefore had to delay computation through some of these custom MCExprs. Said computations were normally done through a lot of the GCNSubtarget calls unfortunately. I'll look into whether I can move some of these around to
Utils/AMDGPUBaseInfo
.