Skip to content

Commit ec57b37

Browse files
committed
AMDGPU: Use attributor to propagate amdgpu-flat-work-group-size
This can merge the acceptable ranges based on the call graph, rather than the simple application of the attribute. Remove the handling from the old pass.
1 parent 8d4b74a commit ec57b37

File tree

4 files changed

+343
-54
lines changed

4 files changed

+343
-54
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,17 @@ class AMDGPUInformationCache : public InformationCache {
128128
return ST.hasApertureRegs();
129129
}
130130

131+
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
132+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
133+
return ST.getFlatWorkGroupSizes(F);
134+
}
135+
136+
std::pair<unsigned, unsigned>
137+
getMaximumFlatWorkGroupRange(const Function &F) {
138+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
139+
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
140+
}
141+
131142
private:
132143
/// Check if the ConstantExpr \p CE requires queue ptr attribute.
133144
static bool visitConstExpr(const ConstantExpr *CE) {
@@ -470,6 +481,118 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
470481
llvm_unreachable("AAAMDAttributes is only valid for function position");
471482
}
472483

484+
/// Propagate amdgpu-flat-work-group-size attribute.
485+
struct AAAMDFlatWorkGroupSize
486+
: public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
487+
using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
488+
AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
489+
: Base(IRP, 32) {}
490+
491+
/// See AbstractAttribute::getState(...).
492+
IntegerRangeState &getState() override { return *this; }
493+
const IntegerRangeState &getState() const override { return *this; }
494+
495+
void initialize(Attributor &A) override {
496+
Function *F = getAssociatedFunction();
497+
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
498+
unsigned MinGroupSize, MaxGroupSize;
499+
std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
500+
intersectKnown(
501+
ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
502+
}
503+
504+
ChangeStatus updateImpl(Attributor &A) override {
505+
ChangeStatus Change = ChangeStatus::UNCHANGED;
506+
507+
auto CheckCallSite = [&](AbstractCallSite CS) {
508+
Function *Caller = CS.getInstruction()->getFunction();
509+
LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
510+
<< "->" << getAssociatedFunction()->getName() << '\n');
511+
512+
const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
513+
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
514+
515+
Change |=
516+
clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
517+
518+
return true;
519+
};
520+
521+
bool AllCallSitesKnown = true;
522+
if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
523+
return indicatePessimisticFixpoint();
524+
525+
return Change;
526+
}
527+
528+
ChangeStatus manifest(Attributor &A) override {
529+
SmallVector<Attribute, 8> AttrList;
530+
Function *F = getAssociatedFunction();
531+
LLVMContext &Ctx = F->getContext();
532+
533+
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
534+
unsigned Min, Max;
535+
std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
536+
537+
// Don't add the attribute if it's the implied default.
538+
if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
539+
return ChangeStatus::UNCHANGED;
540+
541+
SmallString<10> Buffer;
542+
raw_svector_ostream OS(Buffer);
543+
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
544+
545+
AttrList.push_back(
546+
Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
547+
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
548+
/* ForceReplace */ true);
549+
}
550+
551+
const std::string getAsStr() const override {
552+
std::string Str;
553+
raw_string_ostream OS(Str);
554+
OS << "AMDFlatWorkGroupSize[";
555+
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
556+
OS << ']';
557+
return OS.str();
558+
}
559+
560+
/// See AbstractAttribute::trackStatistics()
561+
void trackStatistics() const override {}
562+
563+
/// Create an abstract attribute view for the position \p IRP.
564+
static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
565+
Attributor &A);
566+
567+
/// See AbstractAttribute::getName()
568+
const std::string getName() const override {
569+
return "AAAMDFlatWorkGroupSize";
570+
}
571+
572+
/// See AbstractAttribute::getIdAddr()
573+
const char *getIdAddr() const override { return &ID; }
574+
575+
/// This function should return true if the type of the \p AA is
576+
/// AAAMDFlatWorkGroupSize
577+
static bool classof(const AbstractAttribute *AA) {
578+
return (AA->getIdAddr() == &ID);
579+
}
580+
581+
/// Unique ID (due to the unique address)
582+
static const char ID;
583+
};
584+
585+
const char AAAMDFlatWorkGroupSize::ID = 0;
586+
587+
AAAMDFlatWorkGroupSize &
588+
AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
589+
Attributor &A) {
590+
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
591+
return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
592+
llvm_unreachable(
593+
"AAAMDFlatWorkGroupSize is only valid for function position");
594+
}
595+
473596
class AMDGPUAttributor : public ModulePass {
474597
public:
475598
AMDGPUAttributor() : ModulePass(ID) {}
@@ -497,14 +620,18 @@ class AMDGPUAttributor : public ModulePass {
497620
BumpPtrAllocator Allocator;
498621
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
499622
DenseSet<const char *> Allowed(
500-
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AACallEdges::ID});
623+
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
624+
&AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
501625

502626
Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
503627

504628
for (Function &F : M) {
505629
if (!F.isIntrinsic()) {
506630
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
507631
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
632+
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
633+
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
634+
}
508635
}
509636
}
510637

llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,7 @@ static constexpr const FeatureBitset TargetFeatures = {
5555

5656
// Attributes to propagate.
5757
// TODO: Support conservative min/max merging instead of cloning.
58-
static constexpr const char* AttributeNames[] = {
59-
"amdgpu-waves-per-eu",
60-
"amdgpu-flat-work-group-size"
61-
};
58+
static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
6259

6360
static constexpr unsigned NumAttr =
6461
sizeof(AttributeNames) / sizeof(AttributeNames[0]);

llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll

Lines changed: 0 additions & 49 deletions
This file was deleted.

0 commit comments

Comments
 (0)