@@ -128,6 +128,17 @@ class AMDGPUInformationCache : public InformationCache {
128
128
return ST.hasApertureRegs ();
129
129
}
130
130
131
+ std::pair<unsigned , unsigned > getFlatWorkGroupSizes (const Function &F) {
132
+ const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
133
+ return ST.getFlatWorkGroupSizes (F);
134
+ }
135
+
136
+ std::pair<unsigned , unsigned >
137
+ getMaximumFlatWorkGroupRange (const Function &F) {
138
+ const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
139
+ return {ST.getMinFlatWorkGroupSize (), ST.getMaxFlatWorkGroupSize ()};
140
+ }
141
+
131
142
private:
132
143
// / Check if the ConstantExpr \p CE requires queue ptr attribute.
133
144
static bool visitConstExpr (const ConstantExpr *CE) {
@@ -470,6 +481,118 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
470
481
llvm_unreachable (" AAAMDAttributes is only valid for function position" );
471
482
}
472
483
484
+ // / Propagate amdgpu-flat-work-group-size attribute.
485
+ struct AAAMDFlatWorkGroupSize
486
+ : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t > {
487
+ using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t >;
488
+ AAAMDFlatWorkGroupSize (const IRPosition &IRP, Attributor &A)
489
+ : Base(IRP, 32 ) {}
490
+
491
+ // / See AbstractAttribute::getState(...).
492
+ IntegerRangeState &getState () override { return *this ; }
493
+ const IntegerRangeState &getState () const override { return *this ; }
494
+
495
+ void initialize (Attributor &A) override {
496
+ Function *F = getAssociatedFunction ();
497
+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
498
+ unsigned MinGroupSize, MaxGroupSize;
499
+ std::tie (MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes (*F);
500
+ intersectKnown (
501
+ ConstantRange (APInt (32 , MinGroupSize), APInt (32 , MaxGroupSize + 1 )));
502
+ }
503
+
504
+ ChangeStatus updateImpl (Attributor &A) override {
505
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
506
+
507
+ auto CheckCallSite = [&](AbstractCallSite CS) {
508
+ Function *Caller = CS.getInstruction ()->getFunction ();
509
+ LLVM_DEBUG (dbgs () << " [AAAMDFlatWorkGroupSize] Call " << Caller->getName ()
510
+ << " ->" << getAssociatedFunction ()->getName () << ' \n ' );
511
+
512
+ const auto &CallerInfo = A.getAAFor <AAAMDFlatWorkGroupSize>(
513
+ *this , IRPosition::function (*Caller), DepClassTy::REQUIRED);
514
+
515
+ Change |=
516
+ clampStateAndIndicateChange (this ->getState (), CallerInfo.getState ());
517
+
518
+ return true ;
519
+ };
520
+
521
+ bool AllCallSitesKnown = true ;
522
+ if (!A.checkForAllCallSites (CheckCallSite, *this , true , AllCallSitesKnown))
523
+ return indicatePessimisticFixpoint ();
524
+
525
+ return Change;
526
+ }
527
+
528
+ ChangeStatus manifest (Attributor &A) override {
529
+ SmallVector<Attribute, 8 > AttrList;
530
+ Function *F = getAssociatedFunction ();
531
+ LLVMContext &Ctx = F->getContext ();
532
+
533
+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
534
+ unsigned Min, Max;
535
+ std::tie (Min, Max) = InfoCache.getMaximumFlatWorkGroupRange (*F);
536
+
537
+ // Don't add the attribute if it's the implied default.
538
+ if (getAssumed ().getLower () == Min && getAssumed ().getUpper () - 1 == Max)
539
+ return ChangeStatus::UNCHANGED;
540
+
541
+ SmallString<10 > Buffer;
542
+ raw_svector_ostream OS (Buffer);
543
+ OS << getAssumed ().getLower () << ' ,' << getAssumed ().getUpper () - 1 ;
544
+
545
+ AttrList.push_back (
546
+ Attribute::get (Ctx, " amdgpu-flat-work-group-size" , OS.str ()));
547
+ return IRAttributeManifest::manifestAttrs (A, getIRPosition (), AttrList,
548
+ /* ForceReplace */ true );
549
+ }
550
+
551
+ const std::string getAsStr () const override {
552
+ std::string Str;
553
+ raw_string_ostream OS (Str);
554
+ OS << " AMDFlatWorkGroupSize[" ;
555
+ OS << getAssumed ().getLower () << ' ,' << getAssumed ().getUpper () - 1 ;
556
+ OS << ' ]' ;
557
+ return OS.str ();
558
+ }
559
+
560
+ // / See AbstractAttribute::trackStatistics()
561
+ void trackStatistics () const override {}
562
+
563
+ // / Create an abstract attribute view for the position \p IRP.
564
+ static AAAMDFlatWorkGroupSize &createForPosition (const IRPosition &IRP,
565
+ Attributor &A);
566
+
567
+ // / See AbstractAttribute::getName()
568
+ const std::string getName () const override {
569
+ return " AAAMDFlatWorkGroupSize" ;
570
+ }
571
+
572
+ // / See AbstractAttribute::getIdAddr()
573
+ const char *getIdAddr () const override { return &ID; }
574
+
575
+ // / This function should return true if the type of the \p AA is
576
+ // / AAAMDFlatWorkGroupSize
577
+ static bool classof (const AbstractAttribute *AA) {
578
+ return (AA->getIdAddr () == &ID);
579
+ }
580
+
581
+ // / Unique ID (due to the unique address)
582
+ static const char ID;
583
+ };
584
+
585
+ const char AAAMDFlatWorkGroupSize::ID = 0 ;
586
+
587
+ AAAMDFlatWorkGroupSize &
588
+ AAAMDFlatWorkGroupSize::createForPosition (const IRPosition &IRP,
589
+ Attributor &A) {
590
+ if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
591
+ return *new (A.Allocator ) AAAMDFlatWorkGroupSize (IRP, A);
592
+ llvm_unreachable (
593
+ " AAAMDFlatWorkGroupSize is only valid for function position" );
594
+ }
595
+
473
596
class AMDGPUAttributor : public ModulePass {
474
597
public:
475
598
AMDGPUAttributor () : ModulePass(ID) {}
@@ -497,14 +620,18 @@ class AMDGPUAttributor : public ModulePass {
497
620
BumpPtrAllocator Allocator;
498
621
AMDGPUInformationCache InfoCache (M, AG, Allocator, nullptr , *TM);
499
622
DenseSet<const char *> Allowed (
500
- {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AACallEdges::ID});
623
+ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
624
+ &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
501
625
502
626
Attributor A (Functions, InfoCache, CGUpdater, &Allowed);
503
627
504
628
for (Function &F : M) {
505
629
if (!F.isIntrinsic ()) {
506
630
A.getOrCreateAAFor <AAAMDAttributes>(IRPosition::function (F));
507
631
A.getOrCreateAAFor <AAUniformWorkGroupSize>(IRPosition::function (F));
632
+ if (!AMDGPU::isEntryFunctionCC (F.getCallingConv ())) {
633
+ A.getOrCreateAAFor <AAAMDFlatWorkGroupSize>(IRPosition::function (F));
634
+ }
508
635
}
509
636
}
510
637
0 commit comments