Skip to content

Commit a371ccc

Browse files
committed
Revert "AMDGPU: Handle folding frame indexes into s_add_i32 (llvm#101694)"
This reverts commit 8039886. Change-Id: I7c75bacdc5174f56f6c2ac7bcbbd4c25be824a32
1 parent 5306696 commit a371ccc

File tree

7 files changed

+247
-981
lines changed

7 files changed

+247
-981
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2587,94 +2587,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
25872587
MI->eraseFromParent();
25882588
return true;
25892589
}
2590-
case AMDGPU::S_ADD_I32: {
2591-
// TODO: Handle s_or_b32, s_and_b32.
2592-
unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2593-
MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
25942590

2595-
assert(FrameReg || MFI->isBottomOfStack());
2596-
2597-
MachineOperand &DstOp = MI->getOperand(0);
2598-
const DebugLoc &DL = MI->getDebugLoc();
2599-
Register MaterializedReg = FrameReg;
2600-
2601-
// Defend against live scc, which should never happen in practice.
2602-
bool DeadSCC = MI->getOperand(3).isDead();
2603-
2604-
Register TmpReg;
2605-
2606-
if (FrameReg && !ST.enableFlatScratch()) {
2607-
// FIXME: In the common case where the add does not also read its result
2608-
// (i.e. this isn't a reg += fi), it's not finding the dest reg as
2609-
// available.
2610-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI,
2611-
false, 0);
2612-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
2613-
.addDef(TmpReg, RegState::Renamable)
2614-
.addReg(FrameReg)
2615-
.addImm(ST.getWavefrontSizeLog2())
2616-
.setOperandDead(3); // Set SCC dead
2617-
MaterializedReg = TmpReg;
2618-
}
2619-
2620-
int64_t Offset = FrameInfo.getObjectOffset(Index);
2621-
2622-
// For the non-immediate case, we could fall through to the default
2623-
// handling, but we do an in-place update of the result register here to
2624-
// avoid scavenging another register.
2625-
if (OtherOp.isImm()) {
2626-
OtherOp.setImm(OtherOp.getImm() + Offset);
2627-
Offset = 0;
2628-
2629-
if (MaterializedReg)
2630-
FIOp.ChangeToRegister(MaterializedReg, false);
2631-
else
2632-
FIOp.ChangeToImmediate(0);
2633-
} else if (MaterializedReg) {
2634-
// If we can't fold the other operand, do another increment.
2635-
Register DstReg = DstOp.getReg();
2636-
2637-
if (!TmpReg && MaterializedReg == FrameReg) {
2638-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2639-
MI, false, 0);
2640-
DstReg = TmpReg;
2641-
}
2642-
2643-
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
2644-
.addDef(DstReg, RegState::Renamable)
2645-
.addReg(MaterializedReg, RegState::Kill)
2646-
.add(OtherOp);
2647-
if (DeadSCC)
2648-
AddI32.setOperandDead(3);
2649-
2650-
MaterializedReg = DstReg;
2651-
2652-
OtherOp.ChangeToRegister(MaterializedReg, false);
2653-
OtherOp.setIsKill(true);
2654-
OtherOp.setIsRenamable(true);
2655-
FIOp.ChangeToImmediate(Offset);
2656-
} else {
2657-
// If we don't have any other offset to apply, we can just directly
2658-
// interpret the frame index as the offset.
2659-
FIOp.ChangeToImmediate(Offset);
2660-
}
2661-
2662-
if (DeadSCC && OtherOp.isImm() && OtherOp.getImm() == 0) {
2663-
assert(Offset == 0);
2664-
MI->removeOperand(3);
2665-
MI->removeOperand(OtherOpIdx);
2666-
MI->setDesc(TII->get(FIOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2667-
} else if (DeadSCC && FIOp.isImm() && FIOp.getImm() == 0) {
2668-
assert(Offset == 0);
2669-
MI->removeOperand(3);
2670-
MI->removeOperand(FIOperandNum);
2671-
MI->setDesc(
2672-
TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2673-
}
2674-
2675-
assert(!FIOp.isFI());
2676-
return true;
2677-
}
26782591
default: {
26792592
// Other access to frame index
26802593
const DebugLoc &DL = MI->getDebugLoc();

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
1515
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1616
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
1717
; GFX9-NEXT: s_and_b32 s0, s0, 15
18+
; GFX9-NEXT: s_add_i32 s1, s1, 0
1819
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
1920
; GFX9-NEXT: scratch_store_dword off, v0, s1
2021
; GFX9-NEXT: s_waitcnt vmcnt(0)
22+
; GFX9-NEXT: s_add_i32 s0, s0, 0
2123
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
2224
; GFX9-NEXT: s_waitcnt vmcnt(0)
2325
; GFX9-NEXT: s_endpgm
@@ -34,6 +36,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
3436
; GFX10-NEXT: s_and_b32 s1, s0, 15
3537
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
3638
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
39+
; GFX10-NEXT: s_add_i32 s0, s0, 0
40+
; GFX10-NEXT: s_add_i32 s1, s1, 0
3741
; GFX10-NEXT: scratch_store_dword off, v0, s0
3842
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3943
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -47,9 +51,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
4751
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
4852
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
4953
; GFX940-NEXT: s_and_b32 s0, s0, 15
54+
; GFX940-NEXT: s_add_i32 s1, s1, 0
5055
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
5156
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
5257
; GFX940-NEXT: s_waitcnt vmcnt(0)
58+
; GFX940-NEXT: s_add_i32 s0, s0, 0
5359
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
5460
; GFX940-NEXT: s_waitcnt vmcnt(0)
5561
; GFX940-NEXT: s_endpgm
@@ -62,6 +68,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
6268
; GFX11-NEXT: s_and_b32 s1, s0, 15
6369
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
6470
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
71+
; GFX11-NEXT: s_add_i32 s0, s0, 0
72+
; GFX11-NEXT: s_add_i32 s1, s1, 0
6573
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
6674
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
6775
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -76,6 +84,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
7684
; GFX12-NEXT: s_and_b32 s1, s0, 15
7785
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
7886
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
87+
; GFX12-NEXT: s_add_co_i32 s0, s0, 0
88+
; GFX12-NEXT: s_add_co_i32 s1, s1, 0
7989
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
8090
; GFX12-NEXT: s_wait_storecnt 0x0
8191
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -1032,13 +1042,13 @@ define void @store_load_large_imm_offset_foo() {
10321042
; GFX9-LABEL: store_load_large_imm_offset_foo:
10331043
; GFX9: ; %bb.0: ; %bb
10341044
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035-
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
10361045
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1037-
; GFX9-NEXT: s_add_i32 s1, s32, s0
1046+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1047+
; GFX9-NEXT: s_add_i32 s1, s32, 4
10381048
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
10391049
; GFX9-NEXT: s_waitcnt vmcnt(0)
10401050
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1041-
; GFX9-NEXT: s_add_i32 s0, s1, 4
1051+
; GFX9-NEXT: s_add_i32 s0, s0, s1
10421052
; GFX9-NEXT: scratch_store_dword off, v0, s0
10431053
; GFX9-NEXT: s_waitcnt vmcnt(0)
10441054
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1049,10 +1059,10 @@ define void @store_load_large_imm_offset_foo() {
10491059
; GFX10: ; %bb.0: ; %bb
10501060
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10511061
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1052-
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
10531062
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1054-
; GFX10-NEXT: s_add_i32 s1, s32, s0
1055-
; GFX10-NEXT: s_add_i32 s0, s1, 4
1063+
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1064+
; GFX10-NEXT: s_add_i32 s1, s32, 4
1065+
; GFX10-NEXT: s_add_i32 s0, s0, s1
10561066
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
10571067
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
10581068
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1064,13 +1074,13 @@ define void @store_load_large_imm_offset_foo() {
10641074
; GFX940-LABEL: store_load_large_imm_offset_foo:
10651075
; GFX940: ; %bb.0: ; %bb
10661076
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1067-
; GFX940-NEXT: s_movk_i32 s0, 0x3e80
10681077
; GFX940-NEXT: v_mov_b32_e32 v0, 13
1069-
; GFX940-NEXT: s_add_i32 s1, s32, s0
1078+
; GFX940-NEXT: s_movk_i32 s0, 0x3e80
1079+
; GFX940-NEXT: s_add_i32 s1, s32, 4
10701080
; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
10711081
; GFX940-NEXT: s_waitcnt vmcnt(0)
10721082
; GFX940-NEXT: v_mov_b32_e32 v0, 15
1073-
; GFX940-NEXT: s_add_i32 s0, s1, 4
1083+
; GFX940-NEXT: s_add_i32 s0, s0, s1
10741084
; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
10751085
; GFX940-NEXT: s_waitcnt vmcnt(0)
10761086
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1082,9 +1092,9 @@ define void @store_load_large_imm_offset_foo() {
10821092
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10831093
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
10841094
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1085-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1086-
; GFX11-NEXT: s_add_i32 s1, s32, s0
1087-
; GFX11-NEXT: s_add_i32 s0, s1, 4
1095+
; GFX11-NEXT: s_add_i32 s1, s32, 4
1096+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1097+
; GFX11-NEXT: s_add_i32 s0, s0, s1
10881098
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
10891099
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
10901100
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc

0 commit comments

Comments
 (0)