@@ -96,14 +96,16 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
96
96
define amdgpu_kernel void @set_inactive_f32 (ptr addrspace (1 ) %out , float %in ) {
97
97
; GCN-LABEL: set_inactive_f32:
98
98
; GCN: ; %bb.0:
99
- ; GCN-NEXT: s_load_dword s3 , s[0:1], 0x2c
99
+ ; GCN-NEXT: s_load_dword s4 , s[0:1], 0x2c
100
100
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
101
- ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
101
+ ; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
102
+ ; GCN-NEXT: v_mov_b32_e32 v0, 0x40400000
103
+ ; GCN-NEXT: s_mov_b64 exec, s[2:3]
102
104
; GCN-NEXT: s_mov_b32 s2, -1
103
105
; GCN-NEXT: s_waitcnt lgkmcnt(0)
104
- ; GCN-NEXT: v_mov_b32_e32 v0, s3
106
+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
105
107
; GCN-NEXT: s_not_b64 exec, exec
106
- ; GCN-NEXT: v_mov_b32_e32 v0, v1
108
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
107
109
; GCN-NEXT: s_not_b64 exec, exec
108
110
; GCN-NEXT: s_mov_b32 s3, 0xf000
109
111
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -117,16 +119,18 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
117
119
; GCN-LABEL: set_inactive_f64:
118
120
; GCN: ; %bb.0:
119
121
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
120
- ; GCN-NEXT: s_mov_b32 s4, 0xcccccccd
121
- ; GCN-NEXT: s_mov_b32 s5, 0x4010cccc
122
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
123
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
122
+ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
123
+ ; GCN-NEXT: s_mov_b32 s6, 0xcccccccd
124
+ ; GCN-NEXT: s_mov_b32 s7, 0x4010cccc
125
+ ; GCN-NEXT: v_mov_b32_e32 v0, s6
126
+ ; GCN-NEXT: v_mov_b32_e32 v1, s7
127
+ ; GCN-NEXT: s_mov_b64 exec, s[4:5]
124
128
; GCN-NEXT: s_waitcnt lgkmcnt(0)
125
129
; GCN-NEXT: v_mov_b32_e32 v0, s2
126
130
; GCN-NEXT: v_mov_b32_e32 v1, s3
127
131
; GCN-NEXT: s_not_b64 exec, exec
128
- ; GCN-NEXT: v_mov_b32_e32 v0, v2
129
- ; GCN-NEXT: v_mov_b32_e32 v1, v3
132
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
133
+ ; GCN-NEXT: v_mov_b32_e32 v1, v1
130
134
; GCN-NEXT: s_not_b64 exec, exec
131
135
; GCN-NEXT: s_mov_b32 s2, -1
132
136
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -140,14 +144,16 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
140
144
define amdgpu_kernel void @set_inactive_v2i16 (ptr addrspace (1 ) %out , <2 x i16 > %in ) {
141
145
; GCN-LABEL: set_inactive_v2i16:
142
146
; GCN: ; %bb.0:
143
- ; GCN-NEXT: s_load_dword s3 , s[0:1], 0x2c
147
+ ; GCN-NEXT: s_load_dword s4 , s[0:1], 0x2c
144
148
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
145
- ; GCN-NEXT: v_mov_b32_e32 v1, 0x10001
149
+ ; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
150
+ ; GCN-NEXT: v_mov_b32_e32 v0, 0x10001
151
+ ; GCN-NEXT: s_mov_b64 exec, s[2:3]
146
152
; GCN-NEXT: s_mov_b32 s2, -1
147
153
; GCN-NEXT: s_waitcnt lgkmcnt(0)
148
- ; GCN-NEXT: v_mov_b32_e32 v0, s3
154
+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
149
155
; GCN-NEXT: s_not_b64 exec, exec
150
- ; GCN-NEXT: v_mov_b32_e32 v0, v1
156
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
151
157
; GCN-NEXT: s_not_b64 exec, exec
152
158
; GCN-NEXT: s_mov_b32 s3, 0xf000
153
159
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -160,14 +166,16 @@ define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %
160
166
define amdgpu_kernel void @set_inactive_v2f16 (ptr addrspace (1 ) %out , <2 x half > %in ) {
161
167
; GCN-LABEL: set_inactive_v2f16:
162
168
; GCN: ; %bb.0:
163
- ; GCN-NEXT: s_load_dword s3 , s[0:1], 0x2c
169
+ ; GCN-NEXT: s_load_dword s4 , s[0:1], 0x2c
164
170
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
165
- ; GCN-NEXT: v_mov_b32_e32 v1, 0x3c003c00
171
+ ; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
172
+ ; GCN-NEXT: v_mov_b32_e32 v0, 0x3c003c00
173
+ ; GCN-NEXT: s_mov_b64 exec, s[2:3]
166
174
; GCN-NEXT: s_mov_b32 s2, -1
167
175
; GCN-NEXT: s_waitcnt lgkmcnt(0)
168
- ; GCN-NEXT: v_mov_b32_e32 v0, s3
176
+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
169
177
; GCN-NEXT: s_not_b64 exec, exec
170
- ; GCN-NEXT: v_mov_b32_e32 v0, v1
178
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
171
179
; GCN-NEXT: s_not_b64 exec, exec
172
180
; GCN-NEXT: s_mov_b32 s3, 0xf000
173
181
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -181,16 +189,18 @@ define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %
181
189
; GCN-LABEL: set_inactive_v2i32:
182
190
; GCN: ; %bb.0:
183
191
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
184
- ; GCN-NEXT: s_mov_b32 s4, 1
185
- ; GCN-NEXT: s_mov_b32 s5, s4
186
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
187
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
192
+ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
193
+ ; GCN-NEXT: s_mov_b32 s6, 1
194
+ ; GCN-NEXT: s_mov_b32 s7, s6
195
+ ; GCN-NEXT: v_mov_b32_e32 v0, s6
196
+ ; GCN-NEXT: v_mov_b32_e32 v1, s7
197
+ ; GCN-NEXT: s_mov_b64 exec, s[4:5]
188
198
; GCN-NEXT: s_waitcnt lgkmcnt(0)
189
199
; GCN-NEXT: v_mov_b32_e32 v0, s2
190
200
; GCN-NEXT: v_mov_b32_e32 v1, s3
191
201
; GCN-NEXT: s_not_b64 exec, exec
192
- ; GCN-NEXT: v_mov_b32_e32 v0, v2
193
- ; GCN-NEXT: v_mov_b32_e32 v1, v3
202
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
203
+ ; GCN-NEXT: v_mov_b32_e32 v1, v1
194
204
; GCN-NEXT: s_not_b64 exec, exec
195
205
; GCN-NEXT: s_mov_b32 s2, -1
196
206
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -205,16 +215,18 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
205
215
; GCN-LABEL: set_inactive_v2f32:
206
216
; GCN: ; %bb.0:
207
217
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
208
- ; GCN-NEXT: s_mov_b32 s4, 1.0
209
- ; GCN-NEXT: s_mov_b32 s5, s4
210
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
211
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
218
+ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
219
+ ; GCN-NEXT: s_mov_b32 s6, 1.0
220
+ ; GCN-NEXT: s_mov_b32 s7, s6
221
+ ; GCN-NEXT: v_mov_b32_e32 v0, s6
222
+ ; GCN-NEXT: v_mov_b32_e32 v1, s7
223
+ ; GCN-NEXT: s_mov_b64 exec, s[4:5]
212
224
; GCN-NEXT: s_waitcnt lgkmcnt(0)
213
225
; GCN-NEXT: v_mov_b32_e32 v0, s2
214
226
; GCN-NEXT: v_mov_b32_e32 v1, s3
215
227
; GCN-NEXT: s_not_b64 exec, exec
216
- ; GCN-NEXT: v_mov_b32_e32 v0, v2
217
- ; GCN-NEXT: v_mov_b32_e32 v1, v3
228
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
229
+ ; GCN-NEXT: v_mov_b32_e32 v1, v1
218
230
; GCN-NEXT: s_not_b64 exec, exec
219
231
; GCN-NEXT: s_mov_b32 s2, -1
220
232
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -228,14 +240,16 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
228
240
define amdgpu_kernel void @set_inactive_v2bf16 (ptr addrspace (1 ) %out , <2 x bfloat> %in ) {
229
241
; GCN-LABEL: set_inactive_v2bf16:
230
242
; GCN: ; %bb.0:
231
- ; GCN-NEXT: s_load_dword s3 , s[0:1], 0x2c
243
+ ; GCN-NEXT: s_load_dword s4 , s[0:1], 0x2c
232
244
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
233
- ; GCN-NEXT: v_mov_b32_e32 v1, 0x3f803f80
245
+ ; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
246
+ ; GCN-NEXT: v_mov_b32_e32 v0, 0x3f803f80
247
+ ; GCN-NEXT: s_mov_b64 exec, s[2:3]
234
248
; GCN-NEXT: s_mov_b32 s2, -1
235
249
; GCN-NEXT: s_waitcnt lgkmcnt(0)
236
- ; GCN-NEXT: v_mov_b32_e32 v0, s3
250
+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
237
251
; GCN-NEXT: s_not_b64 exec, exec
238
- ; GCN-NEXT: v_mov_b32_e32 v0, v1
252
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
239
253
; GCN-NEXT: s_not_b64 exec, exec
240
254
; GCN-NEXT: s_mov_b32 s3, 0xf000
241
255
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -249,16 +263,18 @@ define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %
249
263
; GCN-LABEL: set_inactive_v4i16:
250
264
; GCN: ; %bb.0:
251
265
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
252
- ; GCN-NEXT: s_mov_b32 s4, 0x10001
253
- ; GCN-NEXT: s_mov_b32 s5, s4
254
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
255
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
266
+ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
267
+ ; GCN-NEXT: s_mov_b32 s6, 0x10001
268
+ ; GCN-NEXT: s_mov_b32 s7, s6
269
+ ; GCN-NEXT: v_mov_b32_e32 v0, s6
270
+ ; GCN-NEXT: v_mov_b32_e32 v1, s7
271
+ ; GCN-NEXT: s_mov_b64 exec, s[4:5]
256
272
; GCN-NEXT: s_waitcnt lgkmcnt(0)
257
273
; GCN-NEXT: v_mov_b32_e32 v0, s2
258
274
; GCN-NEXT: v_mov_b32_e32 v1, s3
259
275
; GCN-NEXT: s_not_b64 exec, exec
260
- ; GCN-NEXT: v_mov_b32_e32 v0, v2
261
- ; GCN-NEXT: v_mov_b32_e32 v1, v3
276
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
277
+ ; GCN-NEXT: v_mov_b32_e32 v1, v1
262
278
; GCN-NEXT: s_not_b64 exec, exec
263
279
; GCN-NEXT: s_mov_b32 s2, -1
264
280
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -273,16 +289,18 @@ define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half>
273
289
; GCN-LABEL: set_inactive_v4f16:
274
290
; GCN: ; %bb.0:
275
291
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
276
- ; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
277
- ; GCN-NEXT: s_mov_b32 s5, s4
278
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
279
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
292
+ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
293
+ ; GCN-NEXT: s_mov_b32 s6, 0x3c003c00
294
+ ; GCN-NEXT: s_mov_b32 s7, s6
295
+ ; GCN-NEXT: v_mov_b32_e32 v0, s6
296
+ ; GCN-NEXT: v_mov_b32_e32 v1, s7
297
+ ; GCN-NEXT: s_mov_b64 exec, s[4:5]
280
298
; GCN-NEXT: s_waitcnt lgkmcnt(0)
281
299
; GCN-NEXT: v_mov_b32_e32 v0, s2
282
300
; GCN-NEXT: v_mov_b32_e32 v1, s3
283
301
; GCN-NEXT: s_not_b64 exec, exec
284
- ; GCN-NEXT: v_mov_b32_e32 v0, v2
285
- ; GCN-NEXT: v_mov_b32_e32 v1, v3
302
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
303
+ ; GCN-NEXT: v_mov_b32_e32 v1, v1
286
304
; GCN-NEXT: s_not_b64 exec, exec
287
305
; GCN-NEXT: s_mov_b32 s2, -1
288
306
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -297,16 +315,18 @@ define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloa
297
315
; GCN-LABEL: set_inactive_v4bf16:
298
316
; GCN: ; %bb.0:
299
317
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
300
- ; GCN-NEXT: s_mov_b32 s4, 0x3f803f80
301
- ; GCN-NEXT: s_mov_b32 s5, s4
302
- ; GCN-NEXT: v_mov_b32_e32 v2, s4
303
- ; GCN-NEXT: v_mov_b32_e32 v3, s5
318
+ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
319
+ ; GCN-NEXT: s_mov_b32 s6, 0x3f803f80
320
+ ; GCN-NEXT: s_mov_b32 s7, s6
321
+ ; GCN-NEXT: v_mov_b32_e32 v0, s6
322
+ ; GCN-NEXT: v_mov_b32_e32 v1, s7
323
+ ; GCN-NEXT: s_mov_b64 exec, s[4:5]
304
324
; GCN-NEXT: s_waitcnt lgkmcnt(0)
305
325
; GCN-NEXT: v_mov_b32_e32 v0, s2
306
326
; GCN-NEXT: v_mov_b32_e32 v1, s3
307
327
; GCN-NEXT: s_not_b64 exec, exec
308
- ; GCN-NEXT: v_mov_b32_e32 v0, v2
309
- ; GCN-NEXT: v_mov_b32_e32 v1, v3
328
+ ; GCN-NEXT: v_mov_b32_e32 v0, v0
329
+ ; GCN-NEXT: v_mov_b32_e32 v1, v1
310
330
; GCN-NEXT: s_not_b64 exec, exec
311
331
; GCN-NEXT: s_mov_b32 s2, -1
312
332
; GCN-NEXT: s_mov_b32 s3, 0xf000
0 commit comments