Skip to content

Commit 91aac7c

Browse files
authored
AMDGPU: Handle s_add_u32 in eliminateFrameIndex (#129628)
We can fold frame indexes directly into existing immediate operands, just like is already done for s_add_i32. We happen to use s_add_i32 in the 32-bit add case, but s_add_u32 appears in the a 64-bit add sequence of a flat pointer if an addrpacecast source is a frame index. This avoids, but does not address a failure exposed after a316539 where two literal operands end up in the final instruction. The underlying issue still exists for some instructions without special handling in eliminateFrameIndex.
1 parent b41baaf commit 91aac7c

File tree

6 files changed

+179
-64
lines changed

6 files changed

+179
-64
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2713,7 +2713,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
27132713

27142714
return true;
27152715
}
2716-
case AMDGPU::S_ADD_I32: {
2716+
case AMDGPU::S_ADD_I32:
2717+
case AMDGPU::S_ADD_U32: {
27172718
// TODO: Handle s_or_b32, s_and_b32.
27182719
unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
27192720
MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
@@ -2773,7 +2774,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
27732774
DstReg = TmpReg;
27742775
}
27752776

2776-
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
2777+
auto AddI32 = BuildMI(*MBB, *MI, DL, MI->getDesc())
27772778
.addDef(DstReg, RegState::Renamable)
27782779
.addReg(MaterializedReg, RegState::Kill)
27792780
.add(OtherOp);

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,12 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
142142
; GCN-NEXT: v_mov_b32_e32 v0, s48
143143
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:240
144144
; GCN-NEXT: v_mov_b32_e32 v0, s49
145-
; GCN-NEXT: s_and_b32 s4, s25, 63
146145
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:244
147146
; GCN-NEXT: v_mov_b32_e32 v0, s50
148-
; GCN-NEXT: s_lshl_b32 s4, s4, 2
147+
; GCN-NEXT: s_and_b32 s4, s25, 63
149148
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:248
150149
; GCN-NEXT: v_mov_b32_e32 v0, s51
151-
; GCN-NEXT: s_add_u32 s4, 0, s4
150+
; GCN-NEXT: s_lshl_b32 s4, s4, 2
152151
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:252
153152
; GCN-NEXT: v_mov_b32_e32 v0, s24
154153
; GCN-NEXT: v_mov_b32_e32 v1, s4
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
4+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
5+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s
6+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s
7+
8+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s
9+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
10+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s
11+
12+
---
13+
name: s_add_u32__inline_imm__fi_offset0
14+
tracksRegLiveness: true
15+
stack:
16+
- { id: 0, size: 32, alignment: 16 }
17+
machineFunctionInfo:
18+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
19+
frameOffsetReg: '$sgpr33'
20+
stackPtrOffsetReg: '$sgpr32'
21+
body: |
22+
bb.0:
23+
; MUBUFW64-LABEL: name: s_add_u32__inline_imm__fi_offset0
24+
; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
25+
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 12, $sgpr4, implicit-def dead $scc
26+
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
27+
;
28+
; MUBUFW32-LABEL: name: s_add_u32__inline_imm__fi_offset0
29+
; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
30+
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 12, $sgpr4, implicit-def dead $scc
31+
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
32+
;
33+
; FLATSCRW64-LABEL: name: s_add_u32__inline_imm__fi_offset0
34+
; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 12, $sgpr32, implicit-def dead $scc
35+
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
36+
;
37+
; FLATSCRW32-LABEL: name: s_add_u32__inline_imm__fi_offset0
38+
; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 12, $sgpr32, implicit-def dead $scc
39+
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
40+
renamable $sgpr7 = S_ADD_U32 12, %stack.0, implicit-def dead $scc
41+
SI_RETURN implicit $sgpr7
42+
43+
...
44+
45+
---
46+
name: s_add_u32__kernel__literal__fi_offset96__offset_literal
47+
tracksRegLiveness: true
48+
stack:
49+
- { id: 0, size: 96, alignment: 16 }
50+
- { id: 1, size: 128, alignment: 4 }
51+
machineFunctionInfo:
52+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
53+
frameOffsetReg: '$sgpr33'
54+
stackPtrOffsetReg: '$sgpr32'
55+
isEntryFunction: true
56+
body: |
57+
bb.0:
58+
; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
59+
; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
60+
; MUBUFW64-NEXT: {{ $}}
61+
; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
62+
; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
63+
; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164
64+
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
65+
;
66+
; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
67+
; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
68+
; MUBUFW32-NEXT: {{ $}}
69+
; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
70+
; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
71+
; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164
72+
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
73+
;
74+
; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
75+
; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164
76+
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
77+
;
78+
; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal
79+
; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164
80+
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
81+
renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def dead $scc
82+
SI_RETURN implicit $sgpr7
83+
...
84+
85+
---
86+
name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
87+
tracksRegLiveness: true
88+
stack:
89+
- { id: 0, size: 96, alignment: 16 }
90+
- { id: 1, size: 128, alignment: 4 }
91+
machineFunctionInfo:
92+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
93+
frameOffsetReg: '$sgpr33'
94+
stackPtrOffsetReg: '$sgpr32'
95+
isEntryFunction: true
96+
body: |
97+
bb.0:
98+
; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
99+
; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
100+
; MUBUFW64-NEXT: {{ $}}
101+
; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
102+
; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
103+
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
104+
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
105+
;
106+
; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
107+
; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
108+
; MUBUFW32-NEXT: {{ $}}
109+
; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
110+
; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
111+
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
112+
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
113+
;
114+
; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
115+
; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
116+
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
117+
;
118+
; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc
119+
; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc
120+
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
121+
renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def $scc
122+
SI_RETURN implicit $sgpr7, implicit $scc
123+
...

0 commit comments

Comments
 (0)