Skip to content

Commit 7705aeb

Browse files
committed
revert: 11b0401 - [AMDGPU] Restore SP from saved-FP or saved-BP (llvm#124007)
1 parent 0fb6b73 commit 7705aeb

File tree

63 files changed

+832
-1316
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+832
-1316
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,18 +1513,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
15131513
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
15141514
bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
15151515

1516-
if (RoundedSize != 0) {
1517-
if (TRI.hasBasePointer(MF)) {
1518-
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1519-
.addReg(TRI.getBaseRegister())
1520-
.setMIFlag(MachineInstr::FrameDestroy);
1521-
} else if (hasFP(MF)) {
1522-
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1523-
.addReg(FramePtrReg)
1524-
.setMIFlag(MachineInstr::FrameDestroy);
1525-
}
1526-
}
1527-
15281516
Register FramePtrRegScratchCopy;
15291517
Register SGPRForFPSaveRestoreCopy =
15301518
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,8 @@ Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
530530
bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
531531
// When we need stack realignment, we can't reference off of the
532532
// stack pointer, so we reserve a base pointer.
533-
return shouldRealignStack(MF);
533+
const MachineFrameInfo &MFI = MF.getFrameInfo();
534+
return MFI.getNumFixedObjects() && shouldRealignStack(MF);
534535
}
535536

536537
Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }

llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ define ptr addrspace(1) @call_assert_align() {
2727
; CHECK-NEXT: global_store_dword v[0:1], v2, off
2828
; CHECK-NEXT: s_waitcnt vmcnt(0)
2929
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
30-
; CHECK-NEXT: s_mov_b32 s32, s33
3130
; CHECK-NEXT: v_readlane_b32 s4, v40, 2
3231
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
3332
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,6 @@ define void @func_caller_stack() {
247247
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
248248
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
249249
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
250-
; MUBUF-NEXT: s_mov_b32 s32, s33
251250
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
252251
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
253252
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
@@ -287,7 +286,6 @@ define void @func_caller_stack() {
287286
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
288287
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
289288
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
290-
; FLATSCR-NEXT: s_mov_b32 s32, s33
291289
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
292290
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
293291
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
@@ -374,7 +372,6 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
374372
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
375373
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
376374
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
377-
; MUBUF-NEXT: s_mov_b32 s32, s33
378375
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
379376
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
380377
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
@@ -440,7 +437,6 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
440437
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
441438
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
442439
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
443-
; FLATSCR-NEXT: s_mov_b32 s32, s33
444440
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
445441
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
446442
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload

llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
8585
; GFX9-NEXT: s_and_b32 s4, s4, -16
8686
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
8787
; GFX9-NEXT: s_add_u32 s32, s6, s4
88-
; GFX9-NEXT: s_mov_b32 s32, s33
8988
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
9089
; GFX9-NEXT: s_mov_b32 s33, s7
9190
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -112,7 +111,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112111
; GFX10-NEXT: s_and_b32 s4, s4, -16
113112
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114113
; GFX10-NEXT: s_add_u32 s32, s6, s4
115-
; GFX10-NEXT: s_mov_b32 s32, s33
116114
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
117115
; GFX10-NEXT: s_mov_b32 s33, s7
118116
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -126,9 +124,9 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
126124
; GFX11-NEXT: s_getpc_b64 s[0:1]
127125
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
128126
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
129-
; GFX11-NEXT: s_mov_b32 s2, s32
130-
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
131127
; GFX11-NEXT: v_mov_b32_e32 v0, 0
128+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129+
; GFX11-NEXT: s_mov_b32 s2, s32
132130
; GFX11-NEXT: scratch_store_b32 off, v0, s2
133131
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
134132
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -137,9 +135,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
137135
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
138136
; GFX11-NEXT: s_and_b32 s0, s0, -16
139137
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
140-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
138+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
141139
; GFX11-NEXT: s_add_u32 s32, s2, s0
142-
; GFX11-NEXT: s_mov_b32 s32, s33
143140
; GFX11-NEXT: s_add_i32 s32, s32, -16
144141
; GFX11-NEXT: s_mov_b32 s33, s3
145142
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -229,7 +226,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
229226
; GFX9-NEXT: s_and_b32 s4, s4, -16
230227
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
231228
; GFX9-NEXT: s_add_u32 s32, s6, s4
232-
; GFX9-NEXT: s_mov_b32 s32, s33
233229
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
234230
; GFX9-NEXT: s_mov_b32 s33, s7
235231
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -256,7 +252,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
256252
; GFX10-NEXT: s_and_b32 s4, s4, -16
257253
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
258254
; GFX10-NEXT: s_add_u32 s32, s6, s4
259-
; GFX10-NEXT: s_mov_b32 s32, s33
260255
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
261256
; GFX10-NEXT: s_mov_b32 s33, s7
262257
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -270,9 +265,9 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
270265
; GFX11-NEXT: s_getpc_b64 s[0:1]
271266
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
272267
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
273-
; GFX11-NEXT: s_mov_b32 s2, s32
274-
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
275268
; GFX11-NEXT: v_mov_b32_e32 v0, 0
269+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270+
; GFX11-NEXT: s_mov_b32 s2, s32
276271
; GFX11-NEXT: scratch_store_b32 off, v0, s2
277272
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
278273
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -281,9 +276,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
281276
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
282277
; GFX11-NEXT: s_and_b32 s0, s0, -16
283278
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
284-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
279+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
285280
; GFX11-NEXT: s_add_u32 s32, s2, s0
286-
; GFX11-NEXT: s_mov_b32 s32, s33
287281
; GFX11-NEXT: s_add_i32 s32, s32, -16
288282
; GFX11-NEXT: s_mov_b32 s33, s3
289283
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -361,8 +355,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
361355
; GFX9-NEXT: s_mov_b32 s6, s33
362356
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
363357
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
364-
; GFX9-NEXT: s_mov_b32 s7, s34
365-
; GFX9-NEXT: s_mov_b32 s34, s32
366358
; GFX9-NEXT: s_addk_i32 s32, 0x1000
367359
; GFX9-NEXT: s_getpc_b64 s[4:5]
368360
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -380,8 +372,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
380372
; GFX9-NEXT: s_and_b32 s4, s4, -16
381373
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
382374
; GFX9-NEXT: s_add_u32 s32, s5, s4
383-
; GFX9-NEXT: s_mov_b32 s32, s34
384-
; GFX9-NEXT: s_mov_b32 s34, s7
385375
; GFX9-NEXT: s_addk_i32 s32, 0xf000
386376
; GFX9-NEXT: s_mov_b32 s33, s6
387377
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -392,9 +382,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
392382
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393383
; GFX10-NEXT: s_mov_b32 s6, s33
394384
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
395-
; GFX10-NEXT: s_mov_b32 s7, s34
396385
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
397-
; GFX10-NEXT: s_mov_b32 s34, s32
398386
; GFX10-NEXT: s_addk_i32 s32, 0x800
399387
; GFX10-NEXT: s_getpc_b64 s[4:5]
400388
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -412,8 +400,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
412400
; GFX10-NEXT: s_and_b32 s4, s4, -16
413401
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
414402
; GFX10-NEXT: s_add_u32 s32, s5, s4
415-
; GFX10-NEXT: s_mov_b32 s32, s34
416-
; GFX10-NEXT: s_mov_b32 s34, s7
417403
; GFX10-NEXT: s_addk_i32 s32, 0xf800
418404
; GFX10-NEXT: s_mov_b32 s33, s6
419405
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -423,9 +409,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
423409
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424410
; GFX11-NEXT: s_mov_b32 s2, s33
425411
; GFX11-NEXT: s_add_i32 s33, s32, 31
426-
; GFX11-NEXT: s_mov_b32 s3, s34
412+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
427413
; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
428-
; GFX11-NEXT: s_mov_b32 s34, s32
429414
; GFX11-NEXT: s_add_i32 s32, s32, 64
430415
; GFX11-NEXT: s_getpc_b64 s[0:1]
431416
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
@@ -444,8 +429,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
444429
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
445430
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
446431
; GFX11-NEXT: s_add_u32 s32, s1, s0
447-
; GFX11-NEXT: s_mov_b32 s32, s34
448-
; GFX11-NEXT: s_mov_b32 s34, s3
432+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
449433
; GFX11-NEXT: s_addk_i32 s32, 0xffc0
450434
; GFX11-NEXT: s_mov_b32 s33, s2
451435
; GFX11-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,6 @@ define void @sink_null_insert_pt(ptr addrspace(4) %arg0) {
248248
; GFX9-NEXT: s_swappc_b64 s[30:31], 0
249249
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
250250
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
251-
; GFX9-NEXT: s_mov_b32 s32, s33
252251
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
253252
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
254253
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload

llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3
178178
; GCN-NEXT: .LBB2_3: ; %bb.2
179179
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
180180
; GCN-NEXT: v_mov_b32_e32 v0, 0
181-
; GCN-NEXT: s_mov_b32 s32, s33
182181
; GCN-NEXT: global_store_dword v[0:1], v0, off
183182
; GCN-NEXT: s_waitcnt vmcnt(0)
184183
; GCN-NEXT: s_addk_i32 s32, 0xfc00
@@ -217,9 +216,7 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i
217216
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218217
; GCN-NEXT: s_mov_b32 s7, s33
219218
; GCN-NEXT: s_add_i32 s33, s32, 0xfc0
220-
; GCN-NEXT: s_mov_b32 s8, s34
221219
; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000
222-
; GCN-NEXT: s_mov_b32 s34, s32
223220
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
224221
; GCN-NEXT: s_addk_i32 s32, 0x2000
225222
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
@@ -243,10 +240,8 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i
243240
; GCN-NEXT: .LBB3_2: ; %bb.1
244241
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
245242
; GCN-NEXT: v_mov_b32_e32 v0, 0
246-
; GCN-NEXT: s_mov_b32 s32, s34
247243
; GCN-NEXT: global_store_dword v[0:1], v0, off
248244
; GCN-NEXT: s_waitcnt vmcnt(0)
249-
; GCN-NEXT: s_mov_b32 s34, s8
250245
; GCN-NEXT: s_addk_i32 s32, 0xe000
251246
; GCN-NEXT: s_mov_b32 s33, s7
252247
; GCN-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ define void @parent_func_missing_inputs() #0 {
3232
; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
3333
; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0
3434
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
35-
; FIXEDABI-NEXT: s_mov_b32 s32, s33
3635
; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2
3736
; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1
3837
; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload

0 commit comments

Comments
 (0)