Skip to content

Commit a22e10e

Browse files
committed
Revert "Revert "[AMDGPU] Do not assume stack size for PAL code object indirect calls""
This reverts commit 6878809. Problematic interactions with DebugOverlayEnabled have been fixed under: https://github.amd.com/AMD-Radeon-Driver/pal/issues/7695 Change-Id: Icea9c830470c8d3cbe8f28a93b4f649074f7d1cf
1 parent 743bda0 commit a22e10e

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,16 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
104104

105105
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
106106
const TargetMachine &TM = TPC->getTM<TargetMachine>();
107+
const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
107108
bool HasIndirectCall = false;
108109

109110
CallGraph CG = CallGraph(M);
110111
auto End = po_end(&CG);
111112

112113
// By default, for code object v5 and later, track only the minimum scratch
113114
// size
114-
if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5) {
115+
if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
116+
STI.getTargetTriple().getOS() == Triple::AMDPAL) {
115117
if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
116118
AssumedStackSizeForDynamicSizeObjects = 0;
117119
if (!AssumedStackSizeForExternalCall.getNumOccurrences())
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
2+
3+
; Check that we do not assume any default stack size for PAL code object
4+
; indirect calls. The driver knows the max recursion depth, so it can compute
5+
; a more accurate value.
6+
7+
; CHECK: ScratchSize: 0
8+
; CHECK: scratch_memory_size: 0
9+
define amdgpu_vs void @test() {
10+
.entry:
11+
%0 = call i64 @llvm.amdgcn.s.getpc()
12+
%1 = inttoptr i64 %0 to ptr
13+
call amdgpu_gfx void %1()
14+
ret void
15+
}
16+
17+
declare i64 @llvm.amdgcn.s.getpc()

0 commit comments

Comments
 (0)