@@ -85,7 +85,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
85
85
; GFX9-NEXT: s_and_b32 s4, s4, -16
86
86
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
87
87
; GFX9-NEXT: s_add_u32 s32, s6, s4
88
- ; GFX9-NEXT: s_mov_b32 s32, s33
89
88
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
90
89
; GFX9-NEXT: s_mov_b32 s33, s7
91
90
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -112,7 +111,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112
111
; GFX10-NEXT: s_and_b32 s4, s4, -16
113
112
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114
113
; GFX10-NEXT: s_add_u32 s32, s6, s4
115
- ; GFX10-NEXT: s_mov_b32 s32, s33
116
114
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
117
115
; GFX10-NEXT: s_mov_b32 s33, s7
118
116
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -126,9 +124,9 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
126
124
; GFX11-NEXT: s_getpc_b64 s[0:1]
127
125
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
128
126
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
129
- ; GFX11-NEXT: s_mov_b32 s2, s32
130
- ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
131
127
; GFX11-NEXT: v_mov_b32_e32 v0, 0
128
+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129
+ ; GFX11-NEXT: s_mov_b32 s2, s32
132
130
; GFX11-NEXT: scratch_store_b32 off, v0, s2
133
131
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
134
132
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -137,9 +135,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
137
135
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
138
136
; GFX11-NEXT: s_and_b32 s0, s0, -16
139
137
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
140
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1 ) | instid1(SALU_CYCLE_1)
138
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT ) | instid1(SALU_CYCLE_1)
141
139
; GFX11-NEXT: s_add_u32 s32, s2, s0
142
- ; GFX11-NEXT: s_mov_b32 s32, s33
143
140
; GFX11-NEXT: s_add_i32 s32, s32, -16
144
141
; GFX11-NEXT: s_mov_b32 s33, s3
145
142
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -229,7 +226,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
229
226
; GFX9-NEXT: s_and_b32 s4, s4, -16
230
227
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
231
228
; GFX9-NEXT: s_add_u32 s32, s6, s4
232
- ; GFX9-NEXT: s_mov_b32 s32, s33
233
229
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
234
230
; GFX9-NEXT: s_mov_b32 s33, s7
235
231
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -256,7 +252,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
256
252
; GFX10-NEXT: s_and_b32 s4, s4, -16
257
253
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
258
254
; GFX10-NEXT: s_add_u32 s32, s6, s4
259
- ; GFX10-NEXT: s_mov_b32 s32, s33
260
255
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
261
256
; GFX10-NEXT: s_mov_b32 s33, s7
262
257
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -270,9 +265,9 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
270
265
; GFX11-NEXT: s_getpc_b64 s[0:1]
271
266
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
272
267
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
273
- ; GFX11-NEXT: s_mov_b32 s2, s32
274
- ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
275
268
; GFX11-NEXT: v_mov_b32_e32 v0, 0
269
+ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270
+ ; GFX11-NEXT: s_mov_b32 s2, s32
276
271
; GFX11-NEXT: scratch_store_b32 off, v0, s2
277
272
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
278
273
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -281,9 +276,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
281
276
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
282
277
; GFX11-NEXT: s_and_b32 s0, s0, -16
283
278
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
284
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1 ) | instid1(SALU_CYCLE_1)
279
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT ) | instid1(SALU_CYCLE_1)
285
280
; GFX11-NEXT: s_add_u32 s32, s2, s0
286
- ; GFX11-NEXT: s_mov_b32 s32, s33
287
281
; GFX11-NEXT: s_add_i32 s32, s32, -16
288
282
; GFX11-NEXT: s_mov_b32 s33, s3
289
283
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -361,8 +355,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
361
355
; GFX9-NEXT: s_mov_b32 s6, s33
362
356
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
363
357
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
364
- ; GFX9-NEXT: s_mov_b32 s7, s34
365
- ; GFX9-NEXT: s_mov_b32 s34, s32
366
358
; GFX9-NEXT: s_addk_i32 s32, 0x1000
367
359
; GFX9-NEXT: s_getpc_b64 s[4:5]
368
360
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -380,8 +372,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
380
372
; GFX9-NEXT: s_and_b32 s4, s4, -16
381
373
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
382
374
; GFX9-NEXT: s_add_u32 s32, s5, s4
383
- ; GFX9-NEXT: s_mov_b32 s32, s34
384
- ; GFX9-NEXT: s_mov_b32 s34, s7
385
375
; GFX9-NEXT: s_addk_i32 s32, 0xf000
386
376
; GFX9-NEXT: s_mov_b32 s33, s6
387
377
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -392,9 +382,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
392
382
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393
383
; GFX10-NEXT: s_mov_b32 s6, s33
394
384
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
395
- ; GFX10-NEXT: s_mov_b32 s7, s34
396
385
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
397
- ; GFX10-NEXT: s_mov_b32 s34, s32
398
386
; GFX10-NEXT: s_addk_i32 s32, 0x800
399
387
; GFX10-NEXT: s_getpc_b64 s[4:5]
400
388
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -412,8 +400,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
412
400
; GFX10-NEXT: s_and_b32 s4, s4, -16
413
401
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
414
402
; GFX10-NEXT: s_add_u32 s32, s5, s4
415
- ; GFX10-NEXT: s_mov_b32 s32, s34
416
- ; GFX10-NEXT: s_mov_b32 s34, s7
417
403
; GFX10-NEXT: s_addk_i32 s32, 0xf800
418
404
; GFX10-NEXT: s_mov_b32 s33, s6
419
405
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -423,9 +409,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
423
409
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424
410
; GFX11-NEXT: s_mov_b32 s2, s33
425
411
; GFX11-NEXT: s_add_i32 s33, s32, 31
426
- ; GFX11-NEXT: s_mov_b32 s3, s34
412
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
427
413
; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
428
- ; GFX11-NEXT: s_mov_b32 s34, s32
429
414
; GFX11-NEXT: s_add_i32 s32, s32, 64
430
415
; GFX11-NEXT: s_getpc_b64 s[0:1]
431
416
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
@@ -444,8 +429,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
444
429
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
445
430
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
446
431
; GFX11-NEXT: s_add_u32 s32, s1, s0
447
- ; GFX11-NEXT: s_mov_b32 s32, s34
448
- ; GFX11-NEXT: s_mov_b32 s34, s3
432
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
449
433
; GFX11-NEXT: s_addk_i32 s32, 0xffc0
450
434
; GFX11-NEXT: s_mov_b32 s33, s2
451
435
; GFX11-NEXT: s_setpc_b64 s[30:31]
0 commit comments