-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][Attributor] Infer inreg
attribute in AMDGPUAttributor
#101609
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -14,7 +14,10 @@ | |||||
#include "GCNSubtarget.h" | ||||||
#include "Utils/AMDGPUBaseInfo.h" | ||||||
#include "llvm/Analysis/CycleAnalysis.h" | ||||||
#include "llvm/Analysis/TargetTransformInfo.h" | ||||||
#include "llvm/Analysis/UniformityAnalysis.h" | ||||||
#include "llvm/CodeGen/TargetPassConfig.h" | ||||||
#include "llvm/IR/IRBuilder.h" | ||||||
#include "llvm/IR/IntrinsicsAMDGPU.h" | ||||||
#include "llvm/IR/IntrinsicsR600.h" | ||||||
#include "llvm/InitializePasses.h" | ||||||
|
@@ -1295,6 +1298,134 @@ struct AAAMDGPUNoAGPR | |||||
|
||||||
const char AAAMDGPUNoAGPR::ID = 0; | ||||||
|
||||||
struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||
using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||
AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||
|
||||||
/// Create an abstract attribute view for the position \p IRP. | ||||||
static AAAMDGPUUniform &createForPosition(const IRPosition &IRP, | ||||||
Attributor &A); | ||||||
|
||||||
/// See AbstractAttribute::getName() | ||||||
StringRef getName() const override { return "AAAMDGPUUniform"; } | ||||||
|
||||||
const std::string getAsStr(Attributor *A) const override { | ||||||
return getAssumed() ? "uniform" : "divergent"; | ||||||
} | ||||||
|
||||||
void trackStatistics() const override {} | ||||||
|
||||||
/// See AbstractAttribute::getIdAddr() | ||||||
const char *getIdAddr() const override { return &ID; } | ||||||
|
||||||
/// This function should return true if the type of the \p AA is | ||||||
/// AAAMDGPUUniform | ||||||
static bool classof(const AbstractAttribute *AA) { | ||||||
return (AA->getIdAddr() == &ID); | ||||||
} | ||||||
|
||||||
/// Unique ID (due to the unique address) | ||||||
static const char ID; | ||||||
}; | ||||||
|
||||||
const char AAAMDGPUUniform::ID = 0; | ||||||
|
||||||
/// This AA is to infer the inreg attribute for a function argument. | ||||||
struct AAAMDGPUUniformArgument : public AAAMDGPUUniform { | ||||||
AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A) | ||||||
: AAAMDGPUUniform(IRP, A) {} | ||||||
|
||||||
void initialize(Attributor &A) override { | ||||||
Argument *Arg = getAssociatedArgument(); | ||||||
CallingConv::ID CC = Arg->getParent()->getCallingConv(); | ||||||
if (Arg->hasAttribute(Attribute::InReg)) { | ||||||
indicateOptimisticFixpoint(); | ||||||
return; | ||||||
} | ||||||
if (AMDGPU::isEntryFunctionCC(CC)) { | ||||||
// We only use isArgPassedInSGPR on kernel entry function argument, so | ||||||
// even if we will use VPGR for inreg i1 argument passing, it will not | ||||||
// affect this. | ||||||
if (AMDGPU::isArgPassedInSGPR(Arg)) | ||||||
indicateOptimisticFixpoint(); | ||||||
else | ||||||
indicatePessimisticFixpoint(); | ||||||
} | ||||||
} | ||||||
|
||||||
ChangeStatus updateImpl(Attributor &A) override { | ||||||
unsigned ArgNo = getAssociatedArgument()->getArgNo(); | ||||||
|
||||||
auto isUniform = [&](AbstractCallSite ACS) -> bool { | ||||||
CallBase *CB = ACS.getInstruction(); | ||||||
Value *V = CB->getArgOperandUse(ArgNo); | ||||||
if (isa<Constant>(V)) | ||||||
return true; | ||||||
shiltian marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
Function *F = nullptr; | ||||||
if (auto *Arg = dyn_cast<Argument>(V)) { | ||||||
auto *AA = | ||||||
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg)); | ||||||
if (AA) | ||||||
return AA->isValidState(); | ||||||
F = Arg->getParent(); | ||||||
} else if (auto *I = dyn_cast<Instruction>(V)) { | ||||||
F = I->getFunction(); | ||||||
} | ||||||
|
||||||
if (F) { | ||||||
auto *UA = | ||||||
A.getInfoCache() | ||||||
.getAnalysisResultForFunction<UniformityInfoAnalysis>(*F); | ||||||
return UA && UA->isUniform(V); | ||||||
shiltian marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
|
||||||
return false; | ||||||
}; | ||||||
|
||||||
bool UsedAssumedInformation = true; | ||||||
if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true, | ||||||
UsedAssumedInformation)) | ||||||
return indicatePessimisticFixpoint(); | ||||||
|
||||||
if (!UsedAssumedInformation) | ||||||
return indicateOptimisticFixpoint(); | ||||||
|
||||||
return ChangeStatus::UNCHANGED; | ||||||
} | ||||||
|
||||||
ChangeStatus manifest(Attributor &A) override { | ||||||
Argument *Arg = getAssociatedArgument(); | ||||||
// If the argument already has inreg attribute, we will not do anything | ||||||
// about it. | ||||||
if (Arg->hasAttribute(Attribute::InReg)) | ||||||
return ChangeStatus::UNCHANGED; | ||||||
shiltian marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv())) | ||||||
return ChangeStatus::UNCHANGED; | ||||||
// We don't directly emit readfirstlane here because it will cause multiple | ||||||
// replacements of a single use in the manifest map, which is not supported | ||||||
// at this moment. | ||||||
Comment on lines
+1404
to
+1406
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really understand the problem |
||||||
// Add both inreg and "uniform" attribute to the argument. We will emit a | ||||||
// readfirstlane at each call site for inreg uniform argument, and the | ||||||
// "uniform" attribute will be removed later. | ||||||
LLVMContext &Ctx = Arg->getContext(); | ||||||
return A.manifestAttrs(getIRPosition(), | ||||||
{Attribute::get(Ctx, Attribute::InReg), | ||||||
Attribute::get(Ctx, "uniform")}); | ||||||
Comment on lines
+1407
to
+1413
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Once again, I think it would be easiest to restrict this to the trivially uniform case for now. Extending this to arbitrary uniform analysis and the readfirstlanes should be a second step. The trivially uniform case is the most important case, and every problem is going to be in these other cases |
||||||
} | ||||||
}; | ||||||
|
||||||
AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP, | ||||||
Attributor &A) { | ||||||
switch (IRP.getPositionKind()) { | ||||||
case IRPosition::IRP_ARGUMENT: | ||||||
return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A); | ||||||
// TODO: Since inreg is also allowed for return value, maybe we need to add | ||||||
// AAAMDGPUUniformCallSiteReturned? | ||||||
shiltian marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
default: | ||||||
llvm_unreachable("not a valid position for AAAMDGPUUniform"); | ||||||
} | ||||||
} | ||||||
|
||||||
/// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute | ||||||
/// based on the finalized 'amdgpu-flat-work-group-size' attribute. | ||||||
/// Both attributes start with narrow ranges that expand during iteration. | ||||||
|
@@ -1363,6 +1494,64 @@ static bool updateWavesPerEU(Module &M, TargetMachine &TM) { | |||||
return Changed; | ||||||
} | ||||||
|
||||||
/// Emit the readfirstlane intrinsic for all inreg uniform function arguments at | ||||||
/// each call site. The inreg uniform attribute combination is set by | ||||||
/// AAAMDGPUUniform. This function provides a workaround for a downstream issue | ||||||
/// where failing to emit a waterfall loop for 'inreg' arguments may result in | ||||||
/// an invalid VGPR-to-SGPR copy. However, we intentionally avoid a waterfall | ||||||
/// loop for inreg uniform arguments here, because the 'inreg' attribute set by | ||||||
/// AAAMDGPUUniform guarantees uniformity, making the readfirstlane intrinsic | ||||||
/// appropriate. | ||||||
static bool emitReadFirstLaneForInregUniformArgs(Module &M) { | ||||||
bool Changed = false; | ||||||
std::vector<std::pair<CallBase *, unsigned>> WorkList; | ||||||
|
||||||
for (Function &F : M) { | ||||||
if (F.isDeclaration()) | ||||||
continue; | ||||||
for (Argument &Arg : F.args()) { | ||||||
if (!Arg.hasAttribute(Attribute::InReg) || !Arg.hasAttribute("uniform")) | ||||||
continue; | ||||||
unsigned ArgNo = Arg.getArgNo(); | ||||||
for (Use &U : F.uses()) { | ||||||
auto *CB = dyn_cast<CallBase>(U.getUser()); | ||||||
if (!CB) | ||||||
continue; | ||||||
Value *CSArg = CB->getArgOperand(ArgNo); | ||||||
// We don't need readfirstvalue for a global value. | ||||||
if (isa<GlobalValue>(CSArg)) | ||||||
continue; | ||||||
Comment on lines
+1521
to
+1523
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any constant, or really any isTriviallyUniform There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not necessarily. Address space cast can diverge in the future (the code has not been upstream yet I think). |
||||||
// We will skip the call site argument when itself is an inreg argument. | ||||||
// In this case, it will already be in SGPR. | ||||||
if (auto *CSArgArg = dyn_cast<Argument>(CSArg)) { | ||||||
if (CSArgArg->hasAttribute(Attribute::InReg)) | ||||||
continue; | ||||||
} | ||||||
WorkList.emplace_back(CB, ArgNo); | ||||||
} | ||||||
Arg.removeAttr("uniform"); | ||||||
Changed = true; | ||||||
} | ||||||
} | ||||||
|
||||||
if (WorkList.empty()) | ||||||
return Changed; | ||||||
|
||||||
for (auto &[CB, ArgNo] : WorkList) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
Value *V = CB->getArgOperand(ArgNo); | ||||||
IRBuilder<> Builder(CB); | ||||||
Value *NewV = Builder.CreateIntrinsic(V->getType(), | ||||||
Intrinsic::amdgcn_readfirstlane, {V}); | ||||||
Comment on lines
+1543
to
+1544
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the function uses convergence tokens it should add the convergencectrl bundle |
||||||
CB->setArgOperand(ArgNo, NewV); | ||||||
Comment on lines
+1543
to
+1545
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you directly do this in the manifest of the attribute? Ideally this would only do this for cases where this pass introduced the inreg argument and leave existing cases alone There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally yes, but it is not feasible at the moment. Long story short, it could cause to register multiple updates/replacement of a single IRP in the manifest map, which is currently not supported. |
||||||
if (auto *I = dyn_cast<Instruction>(V)) { | ||||||
if (I->use_empty()) | ||||||
I->eraseFromParent(); | ||||||
} | ||||||
} | ||||||
|
||||||
return true; | ||||||
} | ||||||
|
||||||
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, | ||||||
AMDGPUAttributorOptions Options, | ||||||
ThinOrFullLTOPhase LTOPhase) { | ||||||
|
@@ -1381,7 +1570,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, | |||||
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, | ||||||
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, | ||||||
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID, | ||||||
&AAInstanceInfo::ID}); | ||||||
&AAInstanceInfo::ID, &AAAMDGPUUniform::ID}); | ||||||
|
||||||
AttributorConfig AC(CGUpdater); | ||||||
AC.IsClosedWorldModule = Options.IsClosedWorld; | ||||||
|
@@ -1434,11 +1623,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, | |||||
IRPosition::value(*CmpX->getPointerOperand())); | ||||||
} | ||||||
} | ||||||
|
||||||
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { | ||||||
for (auto &Arg : F->args()) | ||||||
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg)); | ||||||
} | ||||||
} | ||||||
|
||||||
bool Changed = A.run() == ChangeStatus::CHANGED; | ||||||
|
||||||
Changed |= updateWavesPerEU(M, TM); | ||||||
Changed |= emitReadFirstLaneForInregUniformArgs(M); | ||||||
|
||||||
return Changed; | ||||||
} | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.