@@ -80,13 +80,13 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
80
80
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
81
81
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
82
82
; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
83
- ; GFX9-NEXT: s_mov_b32 s33, s7
84
83
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
85
84
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
86
85
; GFX9-NEXT: s_and_b32 s4, s4, -16
87
86
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
88
87
; GFX9-NEXT: s_add_u32 s32, s6, s4
89
- ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
88
+ ; GFX9-NEXT: s_mov_b32 s32, s33
89
+ ; GFX9-NEXT: s_mov_b32 s33, s7
90
90
; GFX9-NEXT: s_waitcnt vmcnt(0)
91
91
; GFX9-NEXT: s_setpc_b64 s[30:31]
92
92
;
@@ -103,7 +103,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
103
103
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
104
104
; GFX10-NEXT: v_mov_b32_e32 v0, 0
105
105
; GFX10-NEXT: v_mov_b32_e32 v1, s6
106
- ; GFX10-NEXT: s_mov_b32 s33, s7
107
106
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
108
107
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
109
108
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
@@ -112,7 +111,8 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
112
111
; GFX10-NEXT: s_and_b32 s4, s4, -16
113
112
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
114
113
; GFX10-NEXT: s_add_u32 s32, s6, s4
115
- ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
114
+ ; GFX10-NEXT: s_mov_b32 s32, s33
115
+ ; GFX10-NEXT: s_mov_b32 s33, s7
116
116
; GFX10-NEXT: s_setpc_b64 s[30:31]
117
117
;
118
118
; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align4:
@@ -127,7 +127,6 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
127
127
; GFX11-NEXT: v_mov_b32_e32 v0, 0
128
128
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
129
129
; GFX11-NEXT: s_mov_b32 s2, s32
130
- ; GFX11-NEXT: s_mov_b32 s33, s3
131
130
; GFX11-NEXT: scratch_store_b32 off, v0, s2
132
131
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
133
132
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -136,9 +135,10 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
136
135
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
137
136
; GFX11-NEXT: s_and_b32 s0, s0, -16
138
137
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
139
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
138
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
140
139
; GFX11-NEXT: s_add_u32 s32, s2, s0
141
- ; GFX11-NEXT: s_add_i32 s32, s32, -16
140
+ ; GFX11-NEXT: s_mov_b32 s32, s33
141
+ ; GFX11-NEXT: s_mov_b32 s33, s3
142
142
; GFX11-NEXT: s_setpc_b64 s[30:31]
143
143
%n = load i32 , ptr addrspace (4 ) @gv , align 4
144
144
%alloca = alloca i32 , i32 %n , addrspace (5 )
@@ -221,13 +221,13 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
221
221
; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
222
222
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
223
223
; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
224
- ; GFX9-NEXT: s_mov_b32 s33, s7
225
224
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
226
225
; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15
227
226
; GFX9-NEXT: s_and_b32 s4, s4, -16
228
227
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
229
228
; GFX9-NEXT: s_add_u32 s32, s6, s4
230
- ; GFX9-NEXT: s_addk_i32 s32, 0xfc00
229
+ ; GFX9-NEXT: s_mov_b32 s32, s33
230
+ ; GFX9-NEXT: s_mov_b32 s33, s7
231
231
; GFX9-NEXT: s_waitcnt vmcnt(0)
232
232
; GFX9-NEXT: s_setpc_b64 s[30:31]
233
233
;
@@ -244,7 +244,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
244
244
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
245
245
; GFX10-NEXT: v_mov_b32_e32 v0, 0
246
246
; GFX10-NEXT: v_mov_b32_e32 v1, s6
247
- ; GFX10-NEXT: s_mov_b32 s33, s7
248
247
; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
249
248
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
250
249
; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
@@ -253,7 +252,8 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
253
252
; GFX10-NEXT: s_and_b32 s4, s4, -16
254
253
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
255
254
; GFX10-NEXT: s_add_u32 s32, s6, s4
256
- ; GFX10-NEXT: s_addk_i32 s32, 0xfe00
255
+ ; GFX10-NEXT: s_mov_b32 s32, s33
256
+ ; GFX10-NEXT: s_mov_b32 s33, s7
257
257
; GFX10-NEXT: s_setpc_b64 s[30:31]
258
258
;
259
259
; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align16:
@@ -268,7 +268,6 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
268
268
; GFX11-NEXT: v_mov_b32_e32 v0, 0
269
269
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
270
270
; GFX11-NEXT: s_mov_b32 s2, s32
271
- ; GFX11-NEXT: s_mov_b32 s33, s3
272
271
; GFX11-NEXT: scratch_store_b32 off, v0, s2
273
272
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
274
273
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
@@ -277,9 +276,10 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
277
276
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
278
277
; GFX11-NEXT: s_and_b32 s0, s0, -16
279
278
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
280
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
279
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
281
280
; GFX11-NEXT: s_add_u32 s32, s2, s0
282
- ; GFX11-NEXT: s_add_i32 s32, s32, -16
281
+ ; GFX11-NEXT: s_mov_b32 s32, s33
282
+ ; GFX11-NEXT: s_mov_b32 s33, s3
283
283
; GFX11-NEXT: s_setpc_b64 s[30:31]
284
284
%n = load i32 , ptr addrspace (4 ) @gv , align 16
285
285
%alloca = alloca i32 , i32 %n , addrspace (5 )
@@ -355,6 +355,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
355
355
; GFX9-NEXT: s_mov_b32 s6, s33
356
356
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
357
357
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
358
+ ; GFX9-NEXT: s_mov_b32 s7, s34
359
+ ; GFX9-NEXT: s_mov_b32 s34, s32
358
360
; GFX9-NEXT: s_addk_i32 s32, 0x1000
359
361
; GFX9-NEXT: s_getpc_b64 s[4:5]
360
362
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
@@ -373,7 +375,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
373
375
; GFX9-NEXT: s_and_b32 s4, s4, -16
374
376
; GFX9-NEXT: s_lshl_b32 s4, s4, 6
375
377
; GFX9-NEXT: s_add_u32 s32, s5, s4
376
- ; GFX9-NEXT: s_addk_i32 s32, 0xf000
378
+ ; GFX9-NEXT: s_mov_b32 s32, s34
379
+ ; GFX9-NEXT: s_mov_b32 s34, s7
377
380
; GFX9-NEXT: s_waitcnt vmcnt(0)
378
381
; GFX9-NEXT: s_setpc_b64 s[30:31]
379
382
;
@@ -382,8 +385,10 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
382
385
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383
386
; GFX10-NEXT: s_mov_b32 s6, s33
384
387
; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0
385
- ; GFX10-NEXT: s_addk_i32 s32, 0x800
388
+ ; GFX10-NEXT: s_mov_b32 s7, s34
386
389
; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00
390
+ ; GFX10-NEXT: s_mov_b32 s34, s32
391
+ ; GFX10-NEXT: s_addk_i32 s32, 0x800
387
392
; GFX10-NEXT: s_getpc_b64 s[4:5]
388
393
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
389
394
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
@@ -401,16 +406,19 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
401
406
; GFX10-NEXT: s_and_b32 s4, s4, -16
402
407
; GFX10-NEXT: s_lshl_b32 s4, s4, 5
403
408
; GFX10-NEXT: s_add_u32 s32, s5, s4
404
- ; GFX10-NEXT: s_addk_i32 s32, 0xf800
409
+ ; GFX10-NEXT: s_mov_b32 s32, s34
410
+ ; GFX10-NEXT: s_mov_b32 s34, s7
405
411
; GFX10-NEXT: s_setpc_b64 s[30:31]
406
412
;
407
413
; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align32:
408
414
; GFX11: ; %bb.0:
409
415
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410
416
; GFX11-NEXT: s_mov_b32 s2, s33
411
417
; GFX11-NEXT: s_add_i32 s33, s32, 31
412
- ; GFX11-NEXT: s_add_i32 s32, s32, 64
418
+ ; GFX11-NEXT: s_mov_b32 s3, s34
413
419
; GFX11-NEXT: s_and_not1_b32 s33, s33, 31
420
+ ; GFX11-NEXT: s_mov_b32 s34, s32
421
+ ; GFX11-NEXT: s_add_i32 s32, s32, 64
414
422
; GFX11-NEXT: s_getpc_b64 s[0:1]
415
423
; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4
416
424
; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12
@@ -429,8 +437,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
429
437
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
430
438
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
431
439
; GFX11-NEXT: s_add_u32 s32, s1, s0
432
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
433
- ; GFX11-NEXT: s_addk_i32 s32, 0xffc0
440
+ ; GFX11-NEXT: s_mov_b32 s32, s34
441
+ ; GFX11-NEXT: s_mov_b32 s34, s3
434
442
; GFX11-NEXT: s_setpc_b64 s[30:31]
435
443
%n = load i32 , ptr addrspace (4 ) @gv
436
444
%alloca = alloca i32 , i32 %n , align 32 , addrspace (5 )
0 commit comments