Skip to content

Commit 34c8b83

Browse files
committed
AMDGPU: Don't fix emergency stack slot at offset 0
This forced the caller to be aware of this, which is an ugly ABI feature. Partially reverts r295877. The original reasons for doing this are mostly fixed. Alloca is now in a non-0 address space, so it should be OK to have 0 as a valid pointer. Since we treat the absolute address as the pointer value, this part only really needed to apply to kernels. Since r357093, we avoid the need to increment/decrement the offset register in more cases, and since r354816 the scavenger can fail without spilling, so it's less critical that we try to avoid an offset that fits in the MUBUF offset. Restrict to callable functions for now to split this into 2 steps to limit thte number of test updates and in case anything breaks. llvm-svn: 362665
1 parent c72fbe5 commit 34c8b83

19 files changed

+496
-442
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -773,22 +773,17 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
773773
!AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
774774
assert(RS && "RegScavenger required if spilling");
775775

776-
// We force this to be at offset 0 so no user object ever has 0 as an
777-
// address, so we may use 0 as an invalid pointer value. This is because
778-
// LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
779-
// is required to be address space 0, we are forced to accept this for
780-
// now. Ideally we could have the stack in another address space with 0 as a
781-
// valid pointer, and -1 as the null value.
782-
//
783-
// This will also waste additional space when user stack objects require > 4
784-
// byte alignment.
785-
//
786-
// The main cost here is losing the offset for addressing modes. However
787-
// this also ensures we shouldn't need a register for the offset when
788-
// emergency scavenging.
789-
int ScavengeFI = MFI.CreateFixedObject(
790-
TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
791-
RS->addScavengingFrameIndex(ScavengeFI);
776+
if (FuncInfo->isEntryFunction()) {
777+
int ScavengeFI = MFI.CreateFixedObject(
778+
TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
779+
RS->addScavengingFrameIndex(ScavengeFI);
780+
} else {
781+
int ScavengeFI = MFI.CreateStackObject(
782+
TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
783+
TRI.getSpillAlignment(AMDGPU::SGPR_32RegClass),
784+
false);
785+
RS->addScavengingFrameIndex(ScavengeFI);
786+
}
792787
}
793788
}
794789

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1940,12 +1940,6 @@ SDValue SITargetLowering::LowerFormalArguments(
19401940
bool IsKernel = AMDGPU::isKernel(CallConv);
19411941
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
19421942

1943-
if (!IsEntryFunc) {
1944-
// 4 bytes are reserved at offset 0 for the emergency stack slot. Skip over
1945-
// this when allocating argument fixed offsets.
1946-
CCInfo.AllocateStack(4, 4);
1947-
}
1948-
19491943
if (IsShader) {
19501944
processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
19511945

@@ -2551,7 +2545,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
25512545
"unsupported call from graphics shader of function ");
25522546
}
25532547

2554-
// The first 4 bytes are reserved for the callee's emergency stack slot.
25552548
if (IsTailCall) {
25562549
IsTailCall = isEligibleForTailCallOptimization(
25572550
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
@@ -2578,9 +2571,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
25782571
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
25792572
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
25802573

2581-
// The first 4 bytes are reserved for the callee's emergency stack slot.
2582-
CCInfo.AllocateStack(4, 4);
2583-
25842574
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
25852575

25862576
// Get a count of how many bytes are to be pushed on the stack.

llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll

Lines changed: 66 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
%struct.ByValStruct = type { [4 x i32] }
55

66
; GCN-LABEL: {{^}}void_func_byval_struct:
7-
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 offset:4{{$}}
7+
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
88
; GCN-NOT: s32
9-
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
9+
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
1010
; GCN-NOT: s32
1111

12-
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:20{{$}}
12+
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
1313
; GCN-NOT: s32
14-
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:20{{$}}
14+
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
1515
; GCN-NOT: s32
1616
define hidden void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
1717
entry:
@@ -34,16 +34,16 @@ entry:
3434
; GCN-DAG: buffer_store_dword v33
3535
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
3636
; GCN-DAG: v_writelane_b32
37-
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
37+
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
3838
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
39-
; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
39+
; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5{{$}}
4040

41-
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
41+
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
4242
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
4343

4444
; GCN: s_swappc_b64
4545

46-
; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}}
46+
; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:16{{$}}
4747

4848
; GCN: v_readlane_b32
4949
; GCN-NOT: v_readlane_b32 s32
@@ -74,31 +74,31 @@ entry:
7474
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
7575
; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
7676

77-
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
78-
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
77+
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5{{$}}
78+
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:16
7979

80-
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
81-
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
82-
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
83-
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
80+
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
81+
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:4
82+
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:8
83+
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:12
8484

8585
; GCN-NOT: s_add_u32 s32, s32, 0x800
8686

8787

88-
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
89-
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
90-
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
91-
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
88+
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
89+
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
90+
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
91+
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
9292

93-
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
94-
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
95-
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
96-
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
93+
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:16
94+
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:20
95+
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:24
96+
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:28
9797

98-
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
99-
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
100-
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
101-
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
98+
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
99+
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
100+
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
101+
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
102102

103103
; GCN: s_swappc_b64
104104
; GCN-NOT: v_readlane_b32 s32
@@ -144,20 +144,20 @@ entry:
144144
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
145145
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
146146

147-
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
148-
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
149-
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
150-
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
147+
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
148+
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
149+
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
150+
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
151151

152152
; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
153153
; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
154154
; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
155155
; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
156156

157-
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
158-
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
159-
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
160-
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
157+
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
158+
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
159+
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
160+
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
161161

162162

163163
; GCN: s_swappc_b64
@@ -182,14 +182,14 @@ entry:
182182
}
183183

184184
; GCN-LABEL: {{^}}void_func_byval_struct_align8:
185-
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 offset:8{{$}}
185+
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
186186
; GCN-NOT: s32
187-
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
187+
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
188188
; GCN-NOT: s32
189189

190-
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:24{{$}}
190+
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
191191
; GCN-NOT: s32
192-
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:24{{$}}
192+
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
193193
; GCN-NOT: s32
194194
define hidden void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg1) #1 {
195195
entry:
@@ -222,20 +222,20 @@ entry:
222222
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
223223
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
224224

225-
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
226-
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12
227-
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16
228-
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20
225+
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
226+
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
227+
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
228+
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
229229

230230
; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
231231
; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
232232
; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
233233
; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
234234

235-
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
236-
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
237-
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
238-
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
235+
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
236+
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
237+
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
238+
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
239239

240240

241241
; GCN: s_swappc_b64
@@ -267,30 +267,30 @@ entry:
267267
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
268268
; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
269269

270-
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
271-
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
270+
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5{{$}}
271+
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:16
272272

273-
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
274-
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
275-
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
276-
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
273+
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
274+
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:4
275+
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:8
276+
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:12
277277

278278
; GCN-NOT: s_add_u32 s32, s32, 0x800
279279

280-
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
281-
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12
282-
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16
283-
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20
284-
285-
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
286-
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
287-
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
288-
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
289-
290-
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
291-
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
292-
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
293-
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
280+
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
281+
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
282+
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
283+
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
284+
285+
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:16
286+
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:20
287+
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:24
288+
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:28
289+
; GCN: s_waitcnt vmcnt(0)
290+
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
291+
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
292+
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
293+
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
294294

295295
; GCN: s_swappc_b64
296296
; GCN-NOT: v_readlane_b32 s32

0 commit comments

Comments
 (0)