Skip to content

Commit bc6955f

Browse files
authored
[AMDGPU] Don't fix the scavenge slot at offset 0 (#79136)
At the moment, the emergency spill slot is a fixed object for entry functions and chain functions, and a regular stack object otherwise. This patch adopts the latter behaviour for entry/chain functions too. It seems this was always the intention [1] and it will also save us a bit of stack space in cases where the first stack object has a large alignment. [1] 34c8b83
1 parent b477d39 commit bc6955f

File tree

67 files changed

+2127
-2109
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2127
-2109
lines changed

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -552,14 +552,10 @@ int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
552552
const SIRegisterInfo &TRI) {
553553
if (ScavengeFI)
554554
return *ScavengeFI;
555-
if (isBottomOfStack()) {
556-
ScavengeFI = MFI.CreateFixedObject(
557-
TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
558-
} else {
559-
ScavengeFI = MFI.CreateStackObject(
560-
TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
561-
TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
562-
}
555+
556+
ScavengeFI =
557+
MFI.CreateStackObject(TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
558+
TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
563559
return *ScavengeFI;
564560
}
565561

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,9 +2287,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
22872287
if (FrameReg)
22882288
FIOp.ChangeToRegister(FrameReg, false);
22892289

2290-
if (!Offset)
2291-
return false;
2292-
22932290
MachineOperand *OffsetOp =
22942291
TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
22952292
int64_t NewOffset = Offset + OffsetOp->getImm();

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ define amdgpu_kernel void @kernel_caller_byval() {
6767
; MUBUF-NEXT: s_mov_b32 s3, 0xe00000
6868
; MUBUF-NEXT: s_mov_b64 s[0:1], flat_scratch
6969
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
70+
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0
71+
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
7072
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
7173
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
7274
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:16
@@ -97,25 +99,23 @@ define amdgpu_kernel void @kernel_caller_byval() {
9799
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:116
98100
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:120
99101
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:124
100-
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
101-
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:132
102-
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
102+
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0
103103
; MUBUF-NEXT: s_nop 0
104-
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12
105-
; MUBUF-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:16
106-
; MUBUF-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:20
107-
; MUBUF-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:24
108-
; MUBUF-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:28
109-
; MUBUF-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:32
110-
; MUBUF-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:36
111-
; MUBUF-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:40
112-
; MUBUF-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:44
113-
; MUBUF-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:48
114-
; MUBUF-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:52
115-
; MUBUF-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:56
116-
; MUBUF-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:60
117-
; MUBUF-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:64
118-
; MUBUF-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:68
104+
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
105+
; MUBUF-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:8
106+
; MUBUF-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:12
107+
; MUBUF-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:16
108+
; MUBUF-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:20
109+
; MUBUF-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:24
110+
; MUBUF-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:28
111+
; MUBUF-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:32
112+
; MUBUF-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:36
113+
; MUBUF-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:40
114+
; MUBUF-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:44
115+
; MUBUF-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:48
116+
; MUBUF-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:52
117+
; MUBUF-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:56
118+
; MUBUF-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:60
119119
; MUBUF-NEXT: s_movk_i32 s32, 0x1400
120120
; MUBUF-NEXT: s_getpc_b64 s[4:5]
121121
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4
@@ -162,6 +162,7 @@ define amdgpu_kernel void @kernel_caller_byval() {
162162
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
163163
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
164164
; FLATSCR-NEXT: s_mov_b32 s0, 0
165+
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0
165166
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:8
166167
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:16
167168
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:24
@@ -177,16 +178,15 @@ define amdgpu_kernel void @kernel_caller_byval() {
177178
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:104
178179
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:112
179180
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:120
180-
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:128
181-
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:8
181+
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0
182182
; FLATSCR-NEXT: s_nop 0
183-
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:16
184-
; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:24
185-
; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s0 offset:32
186-
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:40
187-
; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:48
188-
; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s0 offset:56
189-
; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s0 offset:64
183+
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:8
184+
; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:16
185+
; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s0 offset:24
186+
; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:32
187+
; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s0 offset:40
188+
; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s0 offset:48
189+
; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s0 offset:56
190190
; FLATSCR-NEXT: s_movk_i32 s32, 0x50
191191
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
192192
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4

llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ define amdgpu_kernel void @stack_write_fi() {
1212
; CHECK-NEXT: s_mov_b32 s5, 0
1313
; CHECK-NEXT: s_mov_b32 s4, 0
1414
; CHECK-NEXT: v_mov_b32_e32 v0, s5
15-
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
15+
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
1616
; CHECK-NEXT: s_waitcnt vmcnt(0)
1717
; CHECK-NEXT: v_mov_b32_e32 v0, s4
18-
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
18+
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
1919
; CHECK-NEXT: s_waitcnt vmcnt(0)
2020
; CHECK-NEXT: s_endpgm
2121
entry:

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define amdgpu_ps void @amdgpu_ps() {
1212
; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4
1313
; MESA-NEXT: s_mov_b64 s[0:1], src_private_base
1414
; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
15-
; MESA-NEXT: v_mov_b32_e32 v0, 4
15+
; MESA-NEXT: v_mov_b32_e32 v0, 0
1616
; MESA-NEXT: v_mov_b32_e32 v1, s1
1717
; MESA-NEXT: v_mov_b32_e32 v2, 0
1818
; MESA-NEXT: flat_store_dword v[0:1], v2
@@ -24,7 +24,7 @@ define amdgpu_ps void @amdgpu_ps() {
2424
; PAL-NEXT: s_getpc_b64 s[2:3]
2525
; PAL-NEXT: s_mov_b32 s2, s0
2626
; PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
27-
; PAL-NEXT: v_mov_b32_e32 v0, 4
27+
; PAL-NEXT: v_mov_b32_e32 v0, 0
2828
; PAL-NEXT: v_mov_b32_e32 v2, 0
2929
; PAL-NEXT: s_waitcnt lgkmcnt(0)
3030
; PAL-NEXT: s_and_b32 s3, s3, 0xffff

0 commit comments

Comments
 (0)