@@ -3181,7 +3181,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
3181
3181
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3182
3182
; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00
3183
3183
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
3184
- ; GFX11-NEXT: scratch_store_b32 off, v60 , s33 offset:1600 ; 4-byte Folded Spill
3184
+ ; GFX11-NEXT: scratch_store_b32 off, v63 , s33 offset:1584 ; 4-byte Folded Spill
3185
3185
; GFX11-NEXT: s_mov_b32 exec_lo, s0
3186
3186
; GFX11-NEXT: s_mov_b32 s0, 0
3187
3187
; GFX11-NEXT: v_mov_b32_e32 v4, 0
@@ -3191,19 +3191,22 @@ define amdgpu_gfx void @call_72xi32() #1 {
3191
3191
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3192
3192
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
3193
3193
; GFX11-NEXT: s_addk_i32 s32, 0xa00
3194
- ; GFX11-NEXT: s_clause 0xb
3195
- ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:44
3196
- ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:40
3197
- ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:36
3198
- ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:32
3199
- ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:28
3200
- ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:24
3201
- ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:20
3202
- ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:16
3203
- ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:12
3204
- ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:8
3205
- ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:4
3206
- ; GFX11-NEXT: scratch_store_b32 off, v59, s33
3194
+ ; GFX11-NEXT: s_clause 0xe
3195
+ ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:56
3196
+ ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:52
3197
+ ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:48
3198
+ ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:44
3199
+ ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:40
3200
+ ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:36
3201
+ ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:32
3202
+ ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:28
3203
+ ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:24
3204
+ ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:20
3205
+ ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:16
3206
+ ; GFX11-NEXT: scratch_store_b32 off, v59, s33 offset:12
3207
+ ; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:8
3208
+ ; GFX11-NEXT: scratch_store_b32 off, v61, s33 offset:4
3209
+ ; GFX11-NEXT: scratch_store_b32 off, v62, s33
3207
3210
; GFX11-NEXT: s_add_i32 s0, s32, 0xa0
3208
3211
; GFX11-NEXT: s_add_i32 s1, s32, 0x90
3209
3212
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
@@ -3224,7 +3227,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
3224
3227
; GFX11-NEXT: s_add_i32 s0, s32, 32
3225
3228
; GFX11-NEXT: s_add_i32 s1, s32, 16
3226
3229
; GFX11-NEXT: s_add_i32 s2, s33, 0x200
3227
- ; GFX11-NEXT: v_writelane_b32 v60 , s30, 0
3230
+ ; GFX11-NEXT: v_writelane_b32 v63 , s30, 0
3228
3231
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0
3229
3232
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
3230
3233
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
@@ -3245,7 +3248,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
3245
3248
; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
3246
3249
; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi
3247
3250
; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo
3248
- ; GFX11-NEXT: v_writelane_b32 v60 , s31, 1
3251
+ ; GFX11-NEXT: v_writelane_b32 v63 , s31, 1
3249
3252
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3250
3253
; GFX11-NEXT: s_clause 0x1
3251
3254
; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624
@@ -3267,7 +3270,8 @@ define amdgpu_gfx void @call_72xi32() #1 {
3267
3270
; GFX11-NEXT: s_waitcnt vmcnt(2)
3268
3271
; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v1, v4
3269
3272
; GFX11-NEXT: s_waitcnt vmcnt(0)
3270
- ; GFX11-NEXT: scratch_store_b128 off, v[16:19], s33 offset:1584 ; 16-byte Folded Spill
3273
+ ; GFX11-NEXT: v_dual_mov_b32 v62, v19 :: v_dual_mov_b32 v61, v18
3274
+ ; GFX11-NEXT: v_mov_b32_e32 v60, v17
3271
3275
; GFX11-NEXT: s_clause 0x3
3272
3276
; GFX11-NEXT: scratch_load_b128 v[16:19], off, s33 offset:528
3273
3277
; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:544
@@ -3285,17 +3289,18 @@ define amdgpu_gfx void @call_72xi32() #1 {
3285
3289
; GFX11-NEXT: s_waitcnt vmcnt(0)
3286
3290
; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1536 ; 16-byte Folded Spill
3287
3291
; GFX11-NEXT: scratch_store_b128 off, v[32:35], s32
3288
- ; GFX11-NEXT: v_dual_mov_b32 v31, v47 :: v_dual_mov_b32 v32, v36
3292
+ ; GFX11-NEXT: v_mov_b32_e32 v32, v36
3289
3293
; GFX11-NEXT: v_dual_mov_b32 v33, v48 :: v_dual_mov_b32 v34, v49
3294
+ ; GFX11-NEXT: v_mov_b32_e32 v49, v52
3290
3295
; GFX11-NEXT: v_dual_mov_b32 v35, v50 :: v_dual_mov_b32 v48, v51
3291
- ; GFX11-NEXT: v_dual_mov_b32 v49, v52 :: v_dual_mov_b32 v50, v53
3292
- ; GFX11-NEXT: v_dual_mov_b32 v51, v54 :: v_dual_mov_b32 v36, v55
3293
- ; GFX11-NEXT: v_dual_mov_b32 v53, v41 :: v_dual_mov_b32 v52, v40
3294
- ; GFX11-NEXT: v_dual_mov_b32 v54, v42 :: v_dual_mov_b32 v41, v56
3295
- ; GFX11-NEXT: v_dual_mov_b32 v55, v43 :: v_dual_mov_b32 v40, v44
3296
- ; GFX11-NEXT: v_dual_mov_b32 v42, v57 :: v_dual_mov_b32 v57, v12
3296
+ ; GFX11-NEXT: v_dual_mov_b32 v50, v53 :: v_dual_mov_b32 v51, v54
3297
+ ; GFX11-NEXT: v_mov_b32_e32 v36, v55
3298
+ ; GFX11-NEXT: v_dual_mov_b32 v52, v40 :: v_dual_mov_b32 v53, v41
3299
+ ; GFX11-NEXT: v_dual_mov_b32 v54, v42 :: v_dual_mov_b32 v55, v43
3300
+ ; GFX11-NEXT: v_mov_b32_e32 v40, v44
3301
+ ; GFX11-NEXT: v_dual_mov_b32 v41, v56 :: v_dual_mov_b32 v42, v57
3297
3302
; GFX11-NEXT: v_dual_mov_b32 v43, v58 :: v_dual_mov_b32 v56, v59
3298
- ; GFX11-NEXT: v_mov_b32_e32 v58, v13
3303
+ ; GFX11-NEXT: v_dual_mov_b32 v57, v12 :: v_dual_mov_b32 v58, v13
3299
3304
; GFX11-NEXT: v_dual_mov_b32 v12, v15 :: v_dual_mov_b32 v13, v0
3300
3305
; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v0, v3
3301
3306
; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6
@@ -3310,57 +3315,58 @@ define amdgpu_gfx void @call_72xi32() #1 {
3310
3315
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2
3311
3316
; GFX11-NEXT: v_mov_b32_e32 v0, 24
3312
3317
; GFX11-NEXT: s_add_i32 s2, s32, 0x70
3313
- ; GFX11-NEXT: v_mov_b32_e32 v6, v17
3318
+ ; GFX11-NEXT: v_mov_b32_e32 v2, v60
3314
3319
; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2
3315
- ; GFX11-NEXT: v_mov_b32_e32 v13, v24
3320
+ ; GFX11-NEXT: v_mov_b32_e32 v15, v26
3316
3321
; GFX11-NEXT: s_add_i32 s2, s32, 0x6c
3317
- ; GFX11-NEXT: v_mov_b32_e32 v7, v18
3322
+ ; GFX11-NEXT: v_dual_mov_b32 v4, v62 :: v_dual_mov_b32 v13, v24
3318
3323
; GFX11-NEXT: scratch_store_b32 off, v0, s2
3319
3324
; GFX11-NEXT: s_add_i32 s2, s32, 0x60
3320
- ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26
3325
+ ; GFX11-NEXT: v_dual_mov_b32 v6, v17 :: v_dual_mov_b32 v31, v47
3321
3326
; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2
3322
3327
; GFX11-NEXT: s_add_i32 s2, s32, 0x50
3323
- ; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45
3328
+ ; GFX11-NEXT: v_mov_b32_e32 v7, v18
3324
3329
; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2
3325
3330
; GFX11-NEXT: s_add_i32 s2, s32, 64
3326
- ; GFX11-NEXT: v_mov_b32_e32 v14, v25
3331
+ ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v29, v45
3327
3332
; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2
3328
3333
; GFX11-NEXT: s_add_i32 s2, s32, 48
3329
- ; GFX11-NEXT: v_mov_b32_e32 v16, v27
3334
+ ; GFX11-NEXT: v_mov_b32_e32 v12, v23
3330
3335
; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2
3331
3336
; GFX11-NEXT: s_add_i32 s2, s32, 32
3332
- ; GFX11-NEXT: v_mov_b32_e32 v30, v46
3337
+ ; GFX11-NEXT: v_mov_b32_e32 v14, v25
3333
3338
; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2
3334
3339
; GFX11-NEXT: s_add_i32 s2, s32, 16
3340
+ ; GFX11-NEXT: v_mov_b32_e32 v16, v27
3335
3341
; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2
3336
- ; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1584 ; 16-byte Folded Reload
3337
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
3338
- ; GFX11-NEXT: v_mov_b32_e32 v1, 42
3339
3342
; GFX11-NEXT: s_clause 0x2
3340
3343
; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1568
3341
3344
; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1552
3342
3345
; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1536
3343
3346
; GFX11-NEXT: s_add_i32 s2, s33, 0x400
3344
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3345
- ; GFX11-NEXT: v_mov_b32_e32 v0, s2
3347
+ ; GFX11-NEXT: v_dual_mov_b32 v3, v61 :: v_dual_mov_b32 v30, v46
3348
+ ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 42
3346
3349
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
3347
- ; GFX11-NEXT: s_clause 0xb
3348
- ; GFX11-NEXT: scratch_load_b32 v59, off, s33
3349
- ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4
3350
- ; GFX11-NEXT: scratch_load_b32 v57, off, s33 offset:8
3351
- ; GFX11-NEXT: scratch_load_b32 v56, off, s33 offset:12
3352
- ; GFX11-NEXT: scratch_load_b32 v47, off, s33 offset:16
3353
- ; GFX11-NEXT: scratch_load_b32 v46, off, s33 offset:20
3354
- ; GFX11-NEXT: scratch_load_b32 v45, off, s33 offset:24
3355
- ; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:28
3356
- ; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:32
3357
- ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:36
3358
- ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:40
3359
- ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:44
3360
- ; GFX11-NEXT: v_readlane_b32 s31, v60, 1
3361
- ; GFX11-NEXT: v_readlane_b32 s30, v60, 0
3350
+ ; GFX11-NEXT: s_clause 0xe
3351
+ ; GFX11-NEXT: scratch_load_b32 v62, off, s33
3352
+ ; GFX11-NEXT: scratch_load_b32 v61, off, s33 offset:4
3353
+ ; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:8
3354
+ ; GFX11-NEXT: scratch_load_b32 v59, off, s33 offset:12
3355
+ ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:16
3356
+ ; GFX11-NEXT: scratch_load_b32 v57, off, s33 offset:20
3357
+ ; GFX11-NEXT: scratch_load_b32 v56, off, s33 offset:24
3358
+ ; GFX11-NEXT: scratch_load_b32 v47, off, s33 offset:28
3359
+ ; GFX11-NEXT: scratch_load_b32 v46, off, s33 offset:32
3360
+ ; GFX11-NEXT: scratch_load_b32 v45, off, s33 offset:36
3361
+ ; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:40
3362
+ ; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:44
3363
+ ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:48
3364
+ ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:52
3365
+ ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:56
3366
+ ; GFX11-NEXT: v_readlane_b32 s31, v63, 1
3367
+ ; GFX11-NEXT: v_readlane_b32 s30, v63, 0
3362
3368
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
3363
- ; GFX11-NEXT: scratch_load_b32 v60 , off, s33 offset:1600 ; 4-byte Folded Reload
3369
+ ; GFX11-NEXT: scratch_load_b32 v63 , off, s33 offset:1584 ; 4-byte Folded Reload
3364
3370
; GFX11-NEXT: s_mov_b32 exec_lo, s0
3365
3371
; GFX11-NEXT: s_addk_i32 s32, 0xf600
3366
3372
; GFX11-NEXT: s_mov_b32 s33, s34
0 commit comments