Skip to content

Commit 3604fdf

Browse files
committed
[AMDGPU] Do not assume stack size for PAL code object indirect calls
There is no need to set a big default stack size for PAL code object indirect calls. The driver knows the max recursion depth, so it can compute a more accurate value from the minimum scratch size. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D150609
1 parent c24a39b commit 3604fdf

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,16 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
104104

105105
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
106106
const TargetMachine &TM = TPC->getTM<TargetMachine>();
107+
const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
107108
bool HasIndirectCall = false;
108109

109110
CallGraph CG = CallGraph(M);
110111
auto End = po_end(&CG);
111112

112113
// By default, for code object v5 and later, track only the minimum scratch
113114
// size
114-
if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5) {
115+
if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
116+
STI.getTargetTriple().getOS() == Triple::AMDPAL) {
115117
if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
116118
AssumedStackSizeForDynamicSizeObjects = 0;
117119
if (!AssumedStackSizeForExternalCall.getNumOccurrences())
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; RUN llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
2+
3+
; Check that we do not assume any default stack size for PAL code object
4+
; indirect calls. The driver knows the max recursion depth, so it can compute
5+
; a more accurate value.
6+
7+
; CHECK: ScratchSize: 0
8+
; CHECK: scratch_memory_size: 0
9+
define amdgpu_vs void @test() {
10+
.entry:
11+
%0 = call i64 @llvm.amdgcn.s.getpc()
12+
%1 = inttoptr i64 %0 to ptr
13+
call amdgpu_gfx void %1()
14+
ret void
15+
}
16+
17+
declare i64 @llvm.amdgcn.s.getpc()

0 commit comments

Comments
 (0)