@@ -7475,12 +7475,17 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
7475
7475
; GFX12-NEXT: s_clause 0x1
7476
7476
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112
7477
7477
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:96
7478
- ; GFX12-NEXT: v_dual_mov_b32 v0, s22 :: v_dual_mov_b32 v3, s21
7479
- ; GFX12-NEXT: v_dual_mov_b32 v1, s23 :: v_dual_mov_b32 v2, s20
7480
- ; GFX12-NEXT: v_dual_mov_b32 v9, s25 :: v_dual_mov_b32 v8, s24
7481
- ; GFX12-NEXT: v_dual_mov_b32 v11, s27 :: v_dual_mov_b32 v10, s26
7482
- ; GFX12-NEXT: v_dual_mov_b32 v21, s31 :: v_dual_mov_b32 v20, s30
7483
- ; GFX12-NEXT: v_dual_mov_b32 v23, s29 :: v_dual_mov_b32 v22, s28
7478
+ ; GFX12-NEXT: v_dual_mov_b32 v0, s20 :: v_dual_mov_b32 v3, s29
7479
+ ; GFX12-NEXT: v_dual_mov_b32 v1, s21 :: v_dual_mov_b32 v2, s31
7480
+ ; GFX12-NEXT: v_mov_b32_e32 v9, s25
7481
+ ; GFX12-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x80000
7482
+ ; GFX12-NEXT: s_bfe_i64 s[4:5], s[30:31], 0x80000
7483
+ ; GFX12-NEXT: v_dual_mov_b32 v8, s24 :: v_dual_mov_b32 v11, s23
7484
+ ; GFX12-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v17, s27
7485
+ ; GFX12-NEXT: v_dual_mov_b32 v16, s26 :: v_dual_mov_b32 v19, s7
7486
+ ; GFX12-NEXT: v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v21, s3
7487
+ ; GFX12-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v23, s5
7488
+ ; GFX12-NEXT: v_mov_b32_e32 v22, s4
7484
7489
; GFX12-NEXT: s_clause 0x5
7485
7490
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:80
7486
7491
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
@@ -9033,19 +9038,9 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
9033
9038
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:208
9034
9039
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:192
9035
9040
; GFX12-NEXT: s_wait_alu 0xfffe
9036
- ; GFX12-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s71
9037
- ; GFX12-NEXT: v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v2, s70
9038
- ; GFX12-NEXT: v_mov_b32_e32 v5, s53
9039
- ; GFX12-NEXT: s_lshr_b32 s34, s3, 8
9040
- ; GFX12-NEXT: s_mov_b32 s30, s3
9041
- ; GFX12-NEXT: s_lshr_b32 s24, s2, 16
9042
- ; GFX12-NEXT: s_lshr_b32 s22, s2, 24
9043
- ; GFX12-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x80000
9044
- ; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x80000
9045
- ; GFX12-NEXT: v_dual_mov_b32 v4, s52 :: v_dual_mov_b32 v7, s51
9046
- ; GFX12-NEXT: v_dual_mov_b32 v6, s50 :: v_dual_mov_b32 v9, s55
9047
- ; GFX12-NEXT: s_lshr_b32 s20, s2, 8
9048
- ; GFX12-NEXT: s_ashr_i64 s[26:27], s[2:3], 56
9041
+ ; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s62
9042
+ ; GFX12-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s63
9043
+ ; GFX12-NEXT: v_mov_b32_e32 v5, s55
9049
9044
; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x80000
9050
9045
; GFX12-NEXT: v_dual_mov_b32 v4, s54 :: v_dual_mov_b32 v7, s53
9051
9046
; GFX12-NEXT: v_dual_mov_b32 v6, s52 :: v_dual_mov_b32 v9, s57
@@ -9077,11 +9072,15 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
9077
9072
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:128
9078
9073
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[8:9] offset:112
9079
9074
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[8:9] offset:96
9080
- ; GFX12-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v3, s23
9081
- ; GFX12-NEXT: v_dual_mov_b32 v1, s25 :: v_dual_mov_b32 v2, s22
9082
- ; GFX12-NEXT: v_mov_b32_e32 v5, s17
9083
- ; GFX12-NEXT: s_lshr_b32 s68, s0, 8
9084
- ; GFX12-NEXT: s_bfe_i64 s[6:7], s[62:63], 0x80000
9075
+ ; GFX12-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v3, s27
9076
+ ; GFX12-NEXT: v_dual_mov_b32 v1, s29 :: v_dual_mov_b32 v2, s26
9077
+ ; GFX12-NEXT: v_mov_b32_e32 v5, s21
9078
+ ; GFX12-NEXT: s_lshr_b32 s64, s0, 8
9079
+ ; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x80000
9080
+ ; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x80000
9081
+ ; GFX12-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v7, s25
9082
+ ; GFX12-NEXT: v_dual_mov_b32 v6, s24 :: v_dual_mov_b32 v9, s23
9083
+ ; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x80000
9085
9084
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x80000
9086
9085
; GFX12-NEXT: s_wait_alu 0xfffe
9087
9086
; GFX12-NEXT: v_dual_mov_b32 v8, s22 :: v_dual_mov_b32 v11, s6
0 commit comments