[AMDGPU] Remove CC exception for Promote Alloca Limits

Pierre-vh · Pierre-vh · commit fd1d60873fdc · 2023-04-13T08:48:34.000+02:00
Apparently it was used to work around some issue that has been fixed. Removing it helps with high scratch usage observed in some cases due to failed alloca promotion. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D145586
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -124,6 +124,14 @@ class AMDGPUPromoteAllocaToVector : public FunctionPass {
   }
 };
 
+unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
+  if (!TM.getTargetTriple().isAMDGCN())
+    return 128;
+
+  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+  return ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+}
+
 } // end anonymous namespace
 
 char AMDGPUPromoteAlloca::ID = 0;
@@ -176,16 +184,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
   if (!ST.isPromoteAllocaEnabled())
     return false;
 
-  if (IsAMDGCN) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
-    MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
-    // A non-entry function has only 32 caller preserved registers.
-    // Do not promote alloca which will force spilling.
-    if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
-      MaxVGPRs = std::min(MaxVGPRs, 32u);
-  } else {
-    MaxVGPRs = 128;
-  }
+  MaxVGPRs = getMaxVGPRs(TM, F);
 
   bool SufficientLDS = hasSufficientLocalMem(F);
   bool Changed = false;
@@ -1200,17 +1199,7 @@ bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
   if (!ST.isPromoteAllocaEnabled())
     return false;
 
-  unsigned MaxVGPRs;
-  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
-    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
-    MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
-    // A non-entry function has only 32 caller preserved registers.
-    // Do not promote alloca which will force spilling.
-    if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
-      MaxVGPRs = std::min(MaxVGPRs, 32u);
-  } else {
-    MaxVGPRs = 128;
-  }
+  const unsigned MaxVGPRs = getMaxVGPRs(TM, F);
 
   bool Changed = false;
   BasicBlock &EntryBB = *F.begin();
diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
@@ -139,8 +139,8 @@ entry:
 }
 
 ; OPT-LABEL: @func_alloca_9xi64_max256(
-; OPT: alloca
-; OPT-NOT: <9 x i64>
+; OPT-NOT: alloca
+; OPT: <9 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i64>
 define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {