swiftlang
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Lines changed: 128 additions & 1 deletion b/‎llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Lines changed: 128 additions & 1 deletion
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
Lines changed: 1 addition & 4 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
Lines changed: 1 addition & 4 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll
Lines changed: 0 additions & 49 deletions b/‎llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll
Lines changed: 0 additions & 49 deletions
@@ -128,6 +128,17 @@ class AMDGPUInformationCache : public InformationCache {
     return ST.hasApertureRegs();
   }
 
+  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
+    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    return ST.getFlatWorkGroupSizes(F);
+  }
+
+  std::pair<unsigned, unsigned>
+  getMaximumFlatWorkGroupRange(const Function &F) {
+    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
+  }
+
 private:
   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
   static bool visitConstExpr(const ConstantExpr *CE) {
@@ -470,6 +481,118 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
   llvm_unreachable("AAAMDAttributes is only valid for function position");
 }
 
+/// Propagate amdgpu-flat-work-group-size attribute.
+struct AAAMDFlatWorkGroupSize
+    : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
+  using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
+  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
+      : Base(IRP, 32) {}
+
+  /// See AbstractAttribute::getState(...).
+  IntegerRangeState &getState() override { return *this; }
+  const IntegerRangeState &getState() const override { return *this; }
+
+  void initialize(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+    unsigned MinGroupSize, MaxGroupSize;
+    std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
+    intersectKnown(
+        ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+    auto CheckCallSite = [&](AbstractCallSite CS) {
+      Function *Caller = CS.getInstruction()->getFunction();
+      LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
+                        << "->" << getAssociatedFunction()->getName() << '\n');
+
+      const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
+          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+
+      Change |=
+          clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
+
+      return true;
+    };
+
+    bool AllCallSitesKnown = true;
+    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+      return indicatePessimisticFixpoint();
+
+    return Change;
+  }
+
+  ChangeStatus manifest(Attributor &A) override {
+    SmallVector<Attribute, 8> AttrList;
+    Function *F = getAssociatedFunction();
+    LLVMContext &Ctx = F->getContext();
+
+    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+    unsigned Min, Max;
+    std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
+
+    // Don't add the attribute if it's the implied default.
+    if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
+      return ChangeStatus::UNCHANGED;
+
+    SmallString<10> Buffer;
+    raw_svector_ostream OS(Buffer);
+    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
+
+    AttrList.push_back(
+        Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
+    return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
+                                              /* ForceReplace */ true);
+  }
+
+  const std::string getAsStr() const override {
+    std::string Str;
+    raw_string_ostream OS(Str);
+    OS << "AMDFlatWorkGroupSize[";
+    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
+    OS << ']';
+    return OS.str();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
+                                                   Attributor &A);
+
+  /// See AbstractAttribute::getName()
+  const std::string getName() const override {
+    return "AAAMDFlatWorkGroupSize";
+  }
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAMDFlatWorkGroupSize
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
+const char AAAMDFlatWorkGroupSize::ID = 0;
+
+AAAMDFlatWorkGroupSize &
+AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
+                                          Attributor &A) {
+  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+    return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
+  llvm_unreachable(
+      "AAAMDFlatWorkGroupSize is only valid for function position");
+}
+
 class AMDGPUAttributor : public ModulePass {
 public:
   AMDGPUAttributor() : ModulePass(ID) {}
@@ -497,14 +620,18 @@ class AMDGPUAttributor : public ModulePass {
     BumpPtrAllocator Allocator;
     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
     DenseSet<const char *> Allowed(
-        {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AACallEdges::ID});
+        {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
+         &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
 
     Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
 
     for (Function &F : M) {
       if (!F.isIntrinsic()) {
         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
+        if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
+          A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
+        }
       }
     }
 
 
@@ -55,10 +55,7 @@ static constexpr const FeatureBitset TargetFeatures = {
 
 // Attributes to propagate.
 // TODO: Support conservative min/max merging instead of cloning.
-static constexpr const char* AttributeNames[] = {
-  "amdgpu-waves-per-eu",
-  "amdgpu-flat-work-group-size"
-};
+static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
 
 static constexpr unsigned NumAttr =
   sizeof(AttributeNames) / sizeof(AttributeNames[0]);