@@ -8808,90 +8808,90 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
8808
8808
; GFX12-NEXT: v_lshrrev_b16 v2, 8, s6
8809
8809
; GFX12-NEXT: v_lshrrev_b16 v4, 8, s5
8810
8810
; GFX12-NEXT: v_lshrrev_b16 v8, 8, s2
8811
- ; GFX12-NEXT: s_lshr_b32 s24 , s7, 16
8811
+ ; GFX12-NEXT: s_lshr_b32 s22 , s7, 16
8812
8812
; GFX12-NEXT: v_bfe_i32 v31, v1, 0, 8
8813
- ; GFX12-NEXT: s_lshr_b32 s42 , s2, 24
8814
- ; GFX12-NEXT: s_mov_b32 s48 , s7
8813
+ ; GFX12-NEXT: s_lshr_b32 s40 , s2, 24
8814
+ ; GFX12-NEXT: s_mov_b32 s46 , s7
8815
8815
; GFX12-NEXT: v_lshrrev_b16 v5, 8, s4
8816
8816
; GFX12-NEXT: v_lshrrev_b16 v7, 8, s1
8817
- ; GFX12-NEXT: s_lshr_b32 s26 , s6, 16
8818
- ; GFX12-NEXT: s_lshr_b32 s44 , s1, 16
8817
+ ; GFX12-NEXT: s_lshr_b32 s24 , s6, 16
8818
+ ; GFX12-NEXT: s_lshr_b32 s42 , s1, 16
8819
8819
; GFX12-NEXT: s_ashr_i64 s[58:59], s[6:7], 56
8820
- ; GFX12-NEXT: s_bfe_i64 s[48:49 ], s[48:49 ], 0x80000
8821
- ; GFX12-NEXT: s_bfe_i64 s[42:43 ], s[42:43 ], 0x80000
8822
- ; GFX12-NEXT: s_bfe_i64 s[24:25 ], s[24:25 ], 0x80000
8820
+ ; GFX12-NEXT: s_bfe_i64 s[46:47 ], s[46:47 ], 0x80000
8821
+ ; GFX12-NEXT: s_bfe_i64 s[40:41 ], s[40:41 ], 0x80000
8822
+ ; GFX12-NEXT: s_bfe_i64 s[22:23 ], s[22:23 ], 0x80000
8823
8823
; GFX12-NEXT: v_lshrrev_b16 v6, 8, s3
8824
8824
; GFX12-NEXT: v_lshrrev_b16 v3, 8, s0
8825
- ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s24
8826
- ; GFX12-NEXT: s_lshr_b32 s28 , s6, 24
8827
- ; GFX12-NEXT: s_lshr_b32 s30 , s5, 16
8828
- ; GFX12-NEXT: s_lshr_b32 s40 , s2, 16
8825
+ ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s22
8826
+ ; GFX12-NEXT: s_lshr_b32 s26 , s6, 24
8827
+ ; GFX12-NEXT: s_lshr_b32 s28 , s5, 16
8828
+ ; GFX12-NEXT: s_lshr_b32 s38 , s2, 16
8829
8829
; GFX12-NEXT: v_bfe_i32 v11, v8, 0, 8
8830
8830
; GFX12-NEXT: v_bfe_i32 v23, v4, 0, 8
8831
8831
; GFX12-NEXT: v_bfe_i32 v27, v2, 0, 8
8832
8832
; GFX12-NEXT: v_ashrrev_i32_e32 v32, 31, v31
8833
- ; GFX12-NEXT: s_bfe_i64 s[44:45 ], s[44:45 ], 0x80000
8834
- ; GFX12-NEXT: s_bfe_i64 s[26:27 ], s[26:27 ], 0x80000
8835
- ; GFX12-NEXT: v_dual_mov_b32 v34, s25 :: v_dual_mov_b32 v35, s58
8836
- ; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s26
8837
- ; GFX12-NEXT: v_dual_mov_b32 v56, s43 :: v_dual_mov_b32 v29, s48
8838
- ; GFX12-NEXT: v_mov_b32_e32 v30, s49
8839
- ; GFX12-NEXT: s_lshr_b32 s46 , s0, 24
8840
- ; GFX12-NEXT: s_mov_b32 s50 , s5
8841
- ; GFX12-NEXT: s_mov_b32 s52 , s3
8842
- ; GFX12-NEXT: s_lshr_b32 s34 , s4, 16
8843
- ; GFX12-NEXT: s_lshr_b32 s36 , s4, 24
8844
- ; GFX12-NEXT: s_ashr_i64 s[22:23 ], s[2:3], 56
8833
+ ; GFX12-NEXT: s_bfe_i64 s[42:43 ], s[42:43 ], 0x80000
8834
+ ; GFX12-NEXT: s_bfe_i64 s[24:25 ], s[24:25 ], 0x80000
8835
+ ; GFX12-NEXT: v_dual_mov_b32 v34, s23 :: v_dual_mov_b32 v35, s58
8836
+ ; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s24
8837
+ ; GFX12-NEXT: v_dual_mov_b32 v56, s41 :: v_dual_mov_b32 v29, s46
8838
+ ; GFX12-NEXT: v_mov_b32_e32 v30, s47
8839
+ ; GFX12-NEXT: s_lshr_b32 s44 , s0, 24
8840
+ ; GFX12-NEXT: s_mov_b32 s48 , s5
8841
+ ; GFX12-NEXT: s_mov_b32 s50 , s3
8842
+ ; GFX12-NEXT: s_lshr_b32 s30 , s4, 16
8843
+ ; GFX12-NEXT: s_lshr_b32 s34 , s4, 24
8844
+ ; GFX12-NEXT: s_ashr_i64 s[54:55 ], s[2:3], 56
8845
8845
; GFX12-NEXT: s_ashr_i64 s[56:57], s[4:5], 56
8846
8846
; GFX12-NEXT: v_bfe_i32 v7, v7, 0, 8
8847
8847
; GFX12-NEXT: v_bfe_i32 v19, v5, 0, 8
8848
- ; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000
8849
- ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
8848
+ ; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000
8850
8849
; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000
8851
- ; GFX12-NEXT: s_lshr_b32 s38, s3, 16
8852
- ; GFX12-NEXT: s_mov_b32 s54, s1
8850
+ ; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000
8851
+ ; GFX12-NEXT: s_lshr_b32 s36, s3, 16
8852
+ ; GFX12-NEXT: s_mov_b32 s52, s1
8853
8853
; GFX12-NEXT: s_bfe_i64 s[12:13], s[2:3], 0x80000
8854
8854
; GFX12-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x80000
8855
8855
; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000
8856
- ; GFX12-NEXT: s_bfe_i64 s[2:3], s[52:53 ], 0x80000
8857
- ; GFX12-NEXT: s_bfe_i64 s[4:5], s[50:51 ], 0x80000
8858
- ; GFX12-NEXT: s_bfe_i64 s[6:7], s[46:47 ], 0x80000
8856
+ ; GFX12-NEXT: s_bfe_i64 s[2:3], s[50:51 ], 0x80000
8857
+ ; GFX12-NEXT: s_bfe_i64 s[4:5], s[48:49 ], 0x80000
8858
+ ; GFX12-NEXT: s_bfe_i64 s[6:7], s[44:45 ], 0x80000
8859
8859
; GFX12-NEXT: s_lshr_b32 s20, s0, 16
8860
8860
; GFX12-NEXT: s_ashr_i64 s[18:19], s[0:1], 56
8861
8861
; GFX12-NEXT: v_bfe_i32 v3, v3, 0, 8
8862
8862
; GFX12-NEXT: v_bfe_i32 v15, v6, 0, 8
8863
- ; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000
8864
8863
; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000
8865
- ; GFX12-NEXT: v_dual_mov_b32 v38, s27 :: v_dual_mov_b32 v39, s28
8866
- ; GFX12-NEXT: v_dual_mov_b32 v40, s29 :: v_dual_mov_b32 v41, s30
8867
- ; GFX12-NEXT: v_dual_mov_b32 v42, s31 :: v_dual_mov_b32 v43, s56
8868
- ; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s34
8869
- ; GFX12-NEXT: v_dual_mov_b32 v52, s23 :: v_dual_mov_b32 v53, s40
8870
- ; GFX12-NEXT: v_dual_mov_b32 v54, s41 :: v_dual_mov_b32 v55, s42
8864
+ ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000
8865
+ ; GFX12-NEXT: v_dual_mov_b32 v38, s25 :: v_dual_mov_b32 v39, s26
8866
+ ; GFX12-NEXT: v_dual_mov_b32 v40, s27 :: v_dual_mov_b32 v41, s28
8867
+ ; GFX12-NEXT: v_dual_mov_b32 v42, s29 :: v_dual_mov_b32 v43, s56
8868
+ ; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s30
8869
+ ; GFX12-NEXT: v_dual_mov_b32 v52, s55 :: v_dual_mov_b32 v53, s38
8870
+ ; GFX12-NEXT: v_dual_mov_b32 v54, s39 :: v_dual_mov_b32 v55, s40
8871
8871
; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000
8872
- ; GFX12-NEXT: s_bfe_i64 s[0:1], s[54:55 ], 0x80000
8872
+ ; GFX12-NEXT: s_bfe_i64 s[0:1], s[52:53 ], 0x80000
8873
8873
; GFX12-NEXT: v_ashrrev_i32_e32 v12, 31, v11
8874
8874
; GFX12-NEXT: v_ashrrev_i32_e32 v24, 31, v23
8875
8875
; GFX12-NEXT: v_ashrrev_i32_e32 v28, 31, v27
8876
8876
; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:240
8877
- ; GFX12-NEXT: v_mov_b32_e32 v33, s44
8877
+ ; GFX12-NEXT: v_mov_b32_e32 v33, s42
8878
8878
; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:224
8879
8879
; GFX12-NEXT: v_dual_mov_b32 v25, s16 :: v_dual_mov_b32 v26, s17
8880
8880
; GFX12-NEXT: v_dual_mov_b32 v32, s7 :: v_dual_mov_b32 v21, s4
8881
8881
; GFX12-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v17, s14
8882
8882
; GFX12-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v9, s12
8883
8883
; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v5, s0
8884
8884
; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000
8885
- ; GFX12-NEXT: s_bfe_i64 s[38:39 ], s[38:39 ], 0x80000
8886
- ; GFX12-NEXT: v_dual_mov_b32 v46, s35 :: v_dual_mov_b32 v47, s36
8887
- ; GFX12-NEXT: v_dual_mov_b32 v48, s37 :: v_dual_mov_b32 v49, s38
8888
- ; GFX12-NEXT: v_dual_mov_b32 v34, s45 :: v_dual_mov_b32 v35, s18
8885
+ ; GFX12-NEXT: s_bfe_i64 s[36:37 ], s[36:37 ], 0x80000
8886
+ ; GFX12-NEXT: v_dual_mov_b32 v46, s31 :: v_dual_mov_b32 v47, s34
8887
+ ; GFX12-NEXT: v_dual_mov_b32 v48, s35 :: v_dual_mov_b32 v49, s36
8888
+ ; GFX12-NEXT: v_dual_mov_b32 v34, s43 :: v_dual_mov_b32 v35, s18
8889
8889
; GFX12-NEXT: v_dual_mov_b32 v36, s19 :: v_dual_mov_b32 v29, s20
8890
8890
; GFX12-NEXT: v_ashrrev_i32_e32 v8, 31, v7
8891
8891
; GFX12-NEXT: v_ashrrev_i32_e32 v20, 31, v19
8892
8892
; GFX12-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v13, s2
8893
8893
; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v1, s10
8894
- ; GFX12-NEXT: v_dual_mov_b32 v50, s39 :: v_dual_mov_b32 v51, s22
8894
+ ; GFX12-NEXT: v_dual_mov_b32 v50, s37 :: v_dual_mov_b32 v51, s54
8895
8895
; GFX12-NEXT: v_dual_mov_b32 v30, s21 :: v_dual_mov_b32 v31, s6
8896
8896
; GFX12-NEXT: v_ashrrev_i32_e32 v4, 31, v3
8897
8897
; GFX12-NEXT: v_ashrrev_i32_e32 v16, 31, v15
0 commit comments