@@ -48,7 +48,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
48
48
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
49
49
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
50
50
; GCN-NEXT: s_nop 1
51
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
51
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
52
52
; GCN-NEXT: s_nop 7
53
53
; GCN-NEXT: s_nop 3
54
54
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -72,7 +72,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
72
72
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
73
73
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
74
74
; GCN-NEXT: s_nop 1
75
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0 ,0]
75
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1 ,0]
76
76
; GCN-NEXT: s_nop 7
77
77
; GCN-NEXT: s_nop 3
78
78
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -96,7 +96,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
96
96
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
97
97
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
98
98
; GCN-NEXT: s_nop 1
99
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0 ,0]
99
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1 ,0]
100
100
; GCN-NEXT: s_nop 7
101
101
; GCN-NEXT: s_nop 3
102
102
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -120,7 +120,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
120
120
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
121
121
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
122
122
; GCN-NEXT: s_nop 1
123
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi :[0,0 ,0]
123
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel :[0,1,0] op_sel_hi:[0,1 ,0]
124
124
; GCN-NEXT: s_nop 7
125
125
; GCN-NEXT: s_nop 3
126
126
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -144,7 +144,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
144
144
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
145
145
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
146
146
; GCN-NEXT: s_nop 1
147
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0 ,0,0]
147
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1 ,0,0]
148
148
; GCN-NEXT: s_nop 7
149
149
; GCN-NEXT: s_nop 3
150
150
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -168,7 +168,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
168
168
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
169
169
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
170
170
; GCN-NEXT: s_nop 1
171
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi :[0,0 ,0]
171
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel :[0,1,0] op_sel_hi:[1,1 ,0]
172
172
; GCN-NEXT: s_nop 7
173
173
; GCN-NEXT: s_nop 3
174
174
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -192,7 +192,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
192
192
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
193
193
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
194
194
; GCN-NEXT: s_nop 1
195
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[ 0,0,0]
195
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1, 0,0] op_sel_hi:[1,1 ,0]
196
196
; GCN-NEXT: s_nop 7
197
197
; GCN-NEXT: s_nop 3
198
198
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1803,7 +1803,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
1803
1803
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
1804
1804
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
1805
1805
; GCN-NEXT: s_nop 1
1806
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0 ,0]
1806
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1 ,0]
1807
1807
; GCN-NEXT: s_nop 7
1808
1808
; GCN-NEXT: s_nop 3
1809
1809
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1825,7 +1825,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1825
1825
; SDAG-NEXT: v_accvgpr_write_b32 a2, v18
1826
1826
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
1827
1827
; SDAG-NEXT: s_nop 1
1828
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0 ,0]
1828
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1 ,0]
1829
1829
; SDAG-NEXT: s_nop 7
1830
1830
; SDAG-NEXT: s_nop 3
1831
1831
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1843,7 +1843,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1843
1843
; GISEL-NEXT: v_accvgpr_write_b32 a3, v19
1844
1844
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
1845
1845
; GISEL-NEXT: s_nop 1
1846
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0 ,0]
1846
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1 ,0]
1847
1847
; GISEL-NEXT: s_nop 7
1848
1848
; GISEL-NEXT: s_nop 3
1849
1849
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1866,7 +1866,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1866
1866
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
1867
1867
; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d
1868
1868
; SDAG-NEXT: s_nop 1
1869
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0 ,0]
1869
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1 ,0]
1870
1870
; SDAG-NEXT: s_nop 7
1871
1871
; SDAG-NEXT: s_nop 3
1872
1872
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1885,7 +1885,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1885
1885
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
1886
1886
; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d
1887
1887
; GISEL-NEXT: s_nop 1
1888
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0 ,0]
1888
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1 ,0]
1889
1889
; GISEL-NEXT: s_nop 7
1890
1890
; GISEL-NEXT: s_nop 3
1891
1891
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1927,7 +1927,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
1927
1927
; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
1928
1928
; SDAG-NEXT: v_mov_b32_e32 v17, s13
1929
1929
; SDAG-NEXT: s_nop 1
1930
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0 ,0,0] blgp:2
1930
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1 ,0,0] blgp:2
1931
1931
; SDAG-NEXT: s_nop 7
1932
1932
; SDAG-NEXT: s_nop 3
1933
1933
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15]
@@ -1952,7 +1952,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
1952
1952
; GISEL-NEXT: v_accvgpr_write_b32 a3, s27
1953
1953
; GISEL-NEXT: v_mov_b32_e32 v16, s29
1954
1954
; GISEL-NEXT: s_nop 1
1955
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0 ,0,0] blgp:2
1955
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1 ,0,0] blgp:2
1956
1956
; GISEL-NEXT: v_mov_b32_e32 v0, 0
1957
1957
; GISEL-NEXT: s_nop 7
1958
1958
; GISEL-NEXT: s_nop 2
@@ -1993,7 +1993,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
1993
1993
; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
1994
1994
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
1995
1995
; SDAG-NEXT: s_nop 1
1996
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0 ,0,0]
1996
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1 ,0,0]
1997
1997
; SDAG-NEXT: s_nop 7
1998
1998
; SDAG-NEXT: s_nop 3
1999
1999
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5]
@@ -2019,7 +2019,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
2019
2019
; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
2020
2020
; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
2021
2021
; GISEL-NEXT: s_nop 1
2022
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0 ,0,0]
2022
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1 ,0,0]
2023
2023
; GISEL-NEXT: v_mov_b32_e32 v0, 0
2024
2024
; GISEL-NEXT: s_nop 7
2025
2025
; GISEL-NEXT: s_nop 2
0 commit comments