Skip to content

Commit 5740044

Browse files
committed
[AMDGPU] Ensure that V_SET_INACTIVE inactive input is WWM computed
WWM global flag must be set to ensure V_SET_INACTIVE inactive lane input is computed in WWM. Full lowering may be skipped if global flag is not present.
1 parent 3698453 commit 5740044

File tree

3 files changed

+217
-100
lines changed

3 files changed

+217
-100
lines changed

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
564564
}
565565
}
566566
SetInactiveInstrs.push_back(&MI);
567+
GlobalFlags |= StateStrictWWM;
567568
} else if (TII->isDisableWQM(MI)) {
568569
BBI.Needs |= StateExact;
569570
if (!(BBI.InNeeds & StateExact)) {

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll

Lines changed: 72 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,16 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
9696
define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) {
9797
; GCN-LABEL: set_inactive_f32:
9898
; GCN: ; %bb.0:
99-
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
99+
; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
100100
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
101-
; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
101+
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
102+
; GCN-NEXT: v_mov_b32_e32 v0, 0x40400000
103+
; GCN-NEXT: s_mov_b64 exec, s[2:3]
102104
; GCN-NEXT: s_mov_b32 s2, -1
103105
; GCN-NEXT: s_waitcnt lgkmcnt(0)
104-
; GCN-NEXT: v_mov_b32_e32 v0, s3
106+
; GCN-NEXT: v_mov_b32_e32 v0, s4
105107
; GCN-NEXT: s_not_b64 exec, exec
106-
; GCN-NEXT: v_mov_b32_e32 v0, v1
108+
; GCN-NEXT: v_mov_b32_e32 v0, v0
107109
; GCN-NEXT: s_not_b64 exec, exec
108110
; GCN-NEXT: s_mov_b32 s3, 0xf000
109111
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -117,16 +119,18 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
117119
; GCN-LABEL: set_inactive_f64:
118120
; GCN: ; %bb.0:
119121
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
120-
; GCN-NEXT: s_mov_b32 s4, 0xcccccccd
121-
; GCN-NEXT: s_mov_b32 s5, 0x4010cccc
122-
; GCN-NEXT: v_mov_b32_e32 v2, s4
123-
; GCN-NEXT: v_mov_b32_e32 v3, s5
122+
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
123+
; GCN-NEXT: s_mov_b32 s6, 0xcccccccd
124+
; GCN-NEXT: s_mov_b32 s7, 0x4010cccc
125+
; GCN-NEXT: v_mov_b32_e32 v0, s6
126+
; GCN-NEXT: v_mov_b32_e32 v1, s7
127+
; GCN-NEXT: s_mov_b64 exec, s[4:5]
124128
; GCN-NEXT: s_waitcnt lgkmcnt(0)
125129
; GCN-NEXT: v_mov_b32_e32 v0, s2
126130
; GCN-NEXT: v_mov_b32_e32 v1, s3
127131
; GCN-NEXT: s_not_b64 exec, exec
128-
; GCN-NEXT: v_mov_b32_e32 v0, v2
129-
; GCN-NEXT: v_mov_b32_e32 v1, v3
132+
; GCN-NEXT: v_mov_b32_e32 v0, v0
133+
; GCN-NEXT: v_mov_b32_e32 v1, v1
130134
; GCN-NEXT: s_not_b64 exec, exec
131135
; GCN-NEXT: s_mov_b32 s2, -1
132136
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -140,14 +144,16 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
140144
define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) {
141145
; GCN-LABEL: set_inactive_v2i16:
142146
; GCN: ; %bb.0:
143-
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
147+
; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
144148
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
145-
; GCN-NEXT: v_mov_b32_e32 v1, 0x10001
149+
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
150+
; GCN-NEXT: v_mov_b32_e32 v0, 0x10001
151+
; GCN-NEXT: s_mov_b64 exec, s[2:3]
146152
; GCN-NEXT: s_mov_b32 s2, -1
147153
; GCN-NEXT: s_waitcnt lgkmcnt(0)
148-
; GCN-NEXT: v_mov_b32_e32 v0, s3
154+
; GCN-NEXT: v_mov_b32_e32 v0, s4
149155
; GCN-NEXT: s_not_b64 exec, exec
150-
; GCN-NEXT: v_mov_b32_e32 v0, v1
156+
; GCN-NEXT: v_mov_b32_e32 v0, v0
151157
; GCN-NEXT: s_not_b64 exec, exec
152158
; GCN-NEXT: s_mov_b32 s3, 0xf000
153159
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -160,14 +166,16 @@ define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %
160166
define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) {
161167
; GCN-LABEL: set_inactive_v2f16:
162168
; GCN: ; %bb.0:
163-
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
169+
; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
164170
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
165-
; GCN-NEXT: v_mov_b32_e32 v1, 0x3c003c00
171+
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
172+
; GCN-NEXT: v_mov_b32_e32 v0, 0x3c003c00
173+
; GCN-NEXT: s_mov_b64 exec, s[2:3]
166174
; GCN-NEXT: s_mov_b32 s2, -1
167175
; GCN-NEXT: s_waitcnt lgkmcnt(0)
168-
; GCN-NEXT: v_mov_b32_e32 v0, s3
176+
; GCN-NEXT: v_mov_b32_e32 v0, s4
169177
; GCN-NEXT: s_not_b64 exec, exec
170-
; GCN-NEXT: v_mov_b32_e32 v0, v1
178+
; GCN-NEXT: v_mov_b32_e32 v0, v0
171179
; GCN-NEXT: s_not_b64 exec, exec
172180
; GCN-NEXT: s_mov_b32 s3, 0xf000
173181
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -181,16 +189,18 @@ define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %
181189
; GCN-LABEL: set_inactive_v2i32:
182190
; GCN: ; %bb.0:
183191
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
184-
; GCN-NEXT: s_mov_b32 s4, 1
185-
; GCN-NEXT: s_mov_b32 s5, s4
186-
; GCN-NEXT: v_mov_b32_e32 v2, s4
187-
; GCN-NEXT: v_mov_b32_e32 v3, s5
192+
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
193+
; GCN-NEXT: s_mov_b32 s6, 1
194+
; GCN-NEXT: s_mov_b32 s7, s6
195+
; GCN-NEXT: v_mov_b32_e32 v0, s6
196+
; GCN-NEXT: v_mov_b32_e32 v1, s7
197+
; GCN-NEXT: s_mov_b64 exec, s[4:5]
188198
; GCN-NEXT: s_waitcnt lgkmcnt(0)
189199
; GCN-NEXT: v_mov_b32_e32 v0, s2
190200
; GCN-NEXT: v_mov_b32_e32 v1, s3
191201
; GCN-NEXT: s_not_b64 exec, exec
192-
; GCN-NEXT: v_mov_b32_e32 v0, v2
193-
; GCN-NEXT: v_mov_b32_e32 v1, v3
202+
; GCN-NEXT: v_mov_b32_e32 v0, v0
203+
; GCN-NEXT: v_mov_b32_e32 v1, v1
194204
; GCN-NEXT: s_not_b64 exec, exec
195205
; GCN-NEXT: s_mov_b32 s2, -1
196206
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -205,16 +215,18 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
205215
; GCN-LABEL: set_inactive_v2f32:
206216
; GCN: ; %bb.0:
207217
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
208-
; GCN-NEXT: s_mov_b32 s4, 1.0
209-
; GCN-NEXT: s_mov_b32 s5, s4
210-
; GCN-NEXT: v_mov_b32_e32 v2, s4
211-
; GCN-NEXT: v_mov_b32_e32 v3, s5
218+
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
219+
; GCN-NEXT: s_mov_b32 s6, 1.0
220+
; GCN-NEXT: s_mov_b32 s7, s6
221+
; GCN-NEXT: v_mov_b32_e32 v0, s6
222+
; GCN-NEXT: v_mov_b32_e32 v1, s7
223+
; GCN-NEXT: s_mov_b64 exec, s[4:5]
212224
; GCN-NEXT: s_waitcnt lgkmcnt(0)
213225
; GCN-NEXT: v_mov_b32_e32 v0, s2
214226
; GCN-NEXT: v_mov_b32_e32 v1, s3
215227
; GCN-NEXT: s_not_b64 exec, exec
216-
; GCN-NEXT: v_mov_b32_e32 v0, v2
217-
; GCN-NEXT: v_mov_b32_e32 v1, v3
228+
; GCN-NEXT: v_mov_b32_e32 v0, v0
229+
; GCN-NEXT: v_mov_b32_e32 v1, v1
218230
; GCN-NEXT: s_not_b64 exec, exec
219231
; GCN-NEXT: s_mov_b32 s2, -1
220232
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -228,14 +240,16 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
228240
define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in) {
229241
; GCN-LABEL: set_inactive_v2bf16:
230242
; GCN: ; %bb.0:
231-
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
243+
; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
232244
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
233-
; GCN-NEXT: v_mov_b32_e32 v1, 0x3f803f80
245+
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
246+
; GCN-NEXT: v_mov_b32_e32 v0, 0x3f803f80
247+
; GCN-NEXT: s_mov_b64 exec, s[2:3]
234248
; GCN-NEXT: s_mov_b32 s2, -1
235249
; GCN-NEXT: s_waitcnt lgkmcnt(0)
236-
; GCN-NEXT: v_mov_b32_e32 v0, s3
250+
; GCN-NEXT: v_mov_b32_e32 v0, s4
237251
; GCN-NEXT: s_not_b64 exec, exec
238-
; GCN-NEXT: v_mov_b32_e32 v0, v1
252+
; GCN-NEXT: v_mov_b32_e32 v0, v0
239253
; GCN-NEXT: s_not_b64 exec, exec
240254
; GCN-NEXT: s_mov_b32 s3, 0xf000
241255
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -249,16 +263,18 @@ define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %
249263
; GCN-LABEL: set_inactive_v4i16:
250264
; GCN: ; %bb.0:
251265
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
252-
; GCN-NEXT: s_mov_b32 s4, 0x10001
253-
; GCN-NEXT: s_mov_b32 s5, s4
254-
; GCN-NEXT: v_mov_b32_e32 v2, s4
255-
; GCN-NEXT: v_mov_b32_e32 v3, s5
266+
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
267+
; GCN-NEXT: s_mov_b32 s6, 0x10001
268+
; GCN-NEXT: s_mov_b32 s7, s6
269+
; GCN-NEXT: v_mov_b32_e32 v0, s6
270+
; GCN-NEXT: v_mov_b32_e32 v1, s7
271+
; GCN-NEXT: s_mov_b64 exec, s[4:5]
256272
; GCN-NEXT: s_waitcnt lgkmcnt(0)
257273
; GCN-NEXT: v_mov_b32_e32 v0, s2
258274
; GCN-NEXT: v_mov_b32_e32 v1, s3
259275
; GCN-NEXT: s_not_b64 exec, exec
260-
; GCN-NEXT: v_mov_b32_e32 v0, v2
261-
; GCN-NEXT: v_mov_b32_e32 v1, v3
276+
; GCN-NEXT: v_mov_b32_e32 v0, v0
277+
; GCN-NEXT: v_mov_b32_e32 v1, v1
262278
; GCN-NEXT: s_not_b64 exec, exec
263279
; GCN-NEXT: s_mov_b32 s2, -1
264280
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -273,16 +289,18 @@ define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half>
273289
; GCN-LABEL: set_inactive_v4f16:
274290
; GCN: ; %bb.0:
275291
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
276-
; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
277-
; GCN-NEXT: s_mov_b32 s5, s4
278-
; GCN-NEXT: v_mov_b32_e32 v2, s4
279-
; GCN-NEXT: v_mov_b32_e32 v3, s5
292+
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
293+
; GCN-NEXT: s_mov_b32 s6, 0x3c003c00
294+
; GCN-NEXT: s_mov_b32 s7, s6
295+
; GCN-NEXT: v_mov_b32_e32 v0, s6
296+
; GCN-NEXT: v_mov_b32_e32 v1, s7
297+
; GCN-NEXT: s_mov_b64 exec, s[4:5]
280298
; GCN-NEXT: s_waitcnt lgkmcnt(0)
281299
; GCN-NEXT: v_mov_b32_e32 v0, s2
282300
; GCN-NEXT: v_mov_b32_e32 v1, s3
283301
; GCN-NEXT: s_not_b64 exec, exec
284-
; GCN-NEXT: v_mov_b32_e32 v0, v2
285-
; GCN-NEXT: v_mov_b32_e32 v1, v3
302+
; GCN-NEXT: v_mov_b32_e32 v0, v0
303+
; GCN-NEXT: v_mov_b32_e32 v1, v1
286304
; GCN-NEXT: s_not_b64 exec, exec
287305
; GCN-NEXT: s_mov_b32 s2, -1
288306
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -297,16 +315,18 @@ define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloa
297315
; GCN-LABEL: set_inactive_v4bf16:
298316
; GCN: ; %bb.0:
299317
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
300-
; GCN-NEXT: s_mov_b32 s4, 0x3f803f80
301-
; GCN-NEXT: s_mov_b32 s5, s4
302-
; GCN-NEXT: v_mov_b32_e32 v2, s4
303-
; GCN-NEXT: v_mov_b32_e32 v3, s5
318+
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
319+
; GCN-NEXT: s_mov_b32 s6, 0x3f803f80
320+
; GCN-NEXT: s_mov_b32 s7, s6
321+
; GCN-NEXT: v_mov_b32_e32 v0, s6
322+
; GCN-NEXT: v_mov_b32_e32 v1, s7
323+
; GCN-NEXT: s_mov_b64 exec, s[4:5]
304324
; GCN-NEXT: s_waitcnt lgkmcnt(0)
305325
; GCN-NEXT: v_mov_b32_e32 v0, s2
306326
; GCN-NEXT: v_mov_b32_e32 v1, s3
307327
; GCN-NEXT: s_not_b64 exec, exec
308-
; GCN-NEXT: v_mov_b32_e32 v0, v2
309-
; GCN-NEXT: v_mov_b32_e32 v1, v3
328+
; GCN-NEXT: v_mov_b32_e32 v0, v0
329+
; GCN-NEXT: v_mov_b32_e32 v1, v1
310330
; GCN-NEXT: s_not_b64 exec, exec
311331
; GCN-NEXT: s_mov_b32 s2, -1
312332
; GCN-NEXT: s_mov_b32 s3, 0xf000

0 commit comments

Comments
 (0)