Skip to content

Commit fd1d608

Browse files
committed
[AMDGPU] Remove CC exception for Promote Alloca Limits
Apparently it was used to work around some issue that has been fixed. Removing it helps with high scratch usage observed in some cases due to failed alloca promotion. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D145586
1 parent d20a1b8 commit fd1d608

File tree

2 files changed

+12
-23
lines changed

2 files changed

+12
-23
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ class AMDGPUPromoteAllocaToVector : public FunctionPass {
124124
}
125125
};
126126

127+
unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
128+
if (!TM.getTargetTriple().isAMDGCN())
129+
return 128;
130+
131+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
132+
return ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
133+
}
134+
127135
} // end anonymous namespace
128136

129137
char AMDGPUPromoteAlloca::ID = 0;
@@ -176,16 +184,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
176184
if (!ST.isPromoteAllocaEnabled())
177185
return false;
178186

179-
if (IsAMDGCN) {
180-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
181-
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
182-
// A non-entry function has only 32 caller preserved registers.
183-
// Do not promote alloca which will force spilling.
184-
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
185-
MaxVGPRs = std::min(MaxVGPRs, 32u);
186-
} else {
187-
MaxVGPRs = 128;
188-
}
187+
MaxVGPRs = getMaxVGPRs(TM, F);
189188

190189
bool SufficientLDS = hasSufficientLocalMem(F);
191190
bool Changed = false;
@@ -1200,17 +1199,7 @@ bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
12001199
if (!ST.isPromoteAllocaEnabled())
12011200
return false;
12021201

1203-
unsigned MaxVGPRs;
1204-
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
1205-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1206-
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
1207-
// A non-entry function has only 32 caller preserved registers.
1208-
// Do not promote alloca which will force spilling.
1209-
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
1210-
MaxVGPRs = std::min(MaxVGPRs, 32u);
1211-
} else {
1212-
MaxVGPRs = 128;
1213-
}
1202+
const unsigned MaxVGPRs = getMaxVGPRs(TM, F);
12141203

12151204
bool Changed = false;
12161205
BasicBlock &EntryBB = *F.begin();

llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ entry:
139139
}
140140

141141
; OPT-LABEL: @func_alloca_9xi64_max256(
142-
; OPT: alloca
143-
; OPT-NOT: <9 x i64>
142+
; OPT-NOT: alloca
143+
; OPT: <9 x i64>
144144
; LIMIT32: alloca
145145
; LIMIT32-NOT: <9 x i64>
146146
define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {

0 commit comments

Comments
 (0)