Skip to content

Commit a4b14ad

Browse files
committed
AMDGPU: Optimize mfma_scale intrinsics with 0 inputs
We can use the unscaled form of the instruction if we know the scale factors are both 0.
1 parent 7efa846 commit a4b14ad

File tree

4 files changed

+108
-95
lines changed

4 files changed

+108
-95
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,19 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
304304
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
305305
>;
306306

307+
308+
// Optimize v_mfma_scale* instructions to avoid the scale if the
309+
// scales are known 0.
310+
class UnscaledMFMAOptimizationPat<SDPatternOperator intrin> : PatFrag<
311+
(ops node:$srca, node:$srcb, node:$srcc,
312+
node:$cbsz, node:$blgp),
313+
(intrin $srca, $srcb, $srcc, $cbsz, $blgp,
314+
srcvalue, 0, srcvalue, 0)
315+
>;
316+
317+
def mfma_f32_16x16x128_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_16x16x128_f8f6f4>;
318+
def mfma_f32_32x32x64_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_32x32x64_f8f6f4>;
319+
307320
//===----------------------------------------------------------------------===//
308321
// ValueType helpers
309322
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,7 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
888888

889889
// Each of SrcA and SrcB can be encoded using 3 different sizes, so
890890
// define 9 permutations of register classes.
891-
multiclass MAIInst_SrcFormats_mc<string OpName, string ProfileSuffix, SDPatternOperator node = null_frag> {
891+
multiclass MAIInst_SrcFormats_mc<string OpName, string ProfileSuffix, SDPatternOperator node> {
892892
defvar HasAbid = false;
893893
defm _f8_f8 : MAIInst<OpName, "F32_V8I32_V8I32"#ProfileSuffix, node, HasAbid>;
894894
defm _f8_f6 : MAIInst<OpName, "F32_V8I32_V6I32"#ProfileSuffix, node, HasAbid>;
@@ -946,9 +946,9 @@ defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16
946946
defm V_MFMA_F32_32X32X16_BF16 : MAIInst<"v_mfma_f32_32x32x16bf16", "F32_V8BF16_X16", int_amdgcn_mfma_f32_32x32x16_bf16>;
947947

948948
defm V_MFMA_F32_16X16X128_F8F6F4 : MAIInst_SrcFormats_mc<"v_mfma_f32_16x16x128f8f6f4",
949-
"_X128">;
949+
"_X128", mfma_f32_16x16x128_f8f6f4>;
950950
defm V_MFMA_F32_32X32X64_F8F6F4 : MAIInst_SrcFormats_mc<"v_mfma_f32_32x32x64f8f6f4",
951-
"_X512">;
951+
"_X512", mfma_f32_32x32x64_f8f6f4>;
952952

953953
defm V_MFMA_SCALE_F32_16X16X128_F8F6F4 : MAIInst_SrcFormats_Scaled_mc<
954954
"v_mfma_scale_f32_16x16x128_f8f6f4", "V_MFMA_F32_16X16X128_F8F6F4",

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0__cons
207207
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
208208
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
209209
; GCN-NEXT: s_nop 1
210-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0]
210+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3]
211211
; GCN-NEXT: s_nop 3
212212
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
213213
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -255,7 +255,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1__cons
255255
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
256256
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
257257
; GCN-NEXT: s_nop 1
258-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:1
258+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] blgp:1
259259
; GCN-NEXT: s_nop 3
260260
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
261261
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -303,7 +303,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2__cons
303303
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
304304
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
305305
; GCN-NEXT: s_nop 1
306-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:2
306+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:2
307307
; GCN-NEXT: s_nop 3
308308
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
309309
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -351,7 +351,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3__cons
351351
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
352352
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
353353
; GCN-NEXT: s_nop 1
354-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:3
354+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:3
355355
; GCN-NEXT: s_nop 3
356356
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
357357
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -399,7 +399,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4__cons
399399
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
400400
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
401401
; GCN-NEXT: s_nop 1
402-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:4
402+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] blgp:4
403403
; GCN-NEXT: s_nop 3
404404
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
405405
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -447,7 +447,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0__cons
447447
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
448448
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
449449
; GCN-NEXT: s_nop 1
450-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1
450+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1
451451
; GCN-NEXT: s_nop 3
452452
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
453453
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -496,7 +496,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1__cons
496496
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
497497
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
498498
; GCN-NEXT: s_nop 1
499-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:1
499+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 blgp:1
500500
; GCN-NEXT: s_nop 3
501501
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
502502
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -543,7 +543,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2__cons
543543
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
544544
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
545545
; GCN-NEXT: s_nop 1
546-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:2
546+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:2
547547
; GCN-NEXT: s_nop 3
548548
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
549549
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -591,7 +591,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3__cons
591591
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
592592
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
593593
; GCN-NEXT: s_nop 1
594-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:3
594+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:3
595595
; GCN-NEXT: s_nop 3
596596
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
597597
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -639,7 +639,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4__cons
639639
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
640640
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
641641
; GCN-NEXT: s_nop 1
642-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:4
642+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] cbsz:1 blgp:4
643643
; GCN-NEXT: s_nop 3
644644
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
645645
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -687,7 +687,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0__cons
687687
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
688688
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
689689
; GCN-NEXT: s_nop 1
690-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2
690+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2
691691
; GCN-NEXT: s_nop 3
692692
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
693693
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -735,7 +735,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1__cons
735735
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
736736
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
737737
; GCN-NEXT: s_nop 1
738-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:1
738+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 blgp:1
739739
; GCN-NEXT: s_nop 3
740740
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
741741
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -783,7 +783,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2__cons
783783
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
784784
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
785785
; GCN-NEXT: s_nop 1
786-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:2
786+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:2
787787
; GCN-NEXT: s_nop 3
788788
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
789789
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -831,7 +831,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3__cons
831831
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
832832
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
833833
; GCN-NEXT: s_nop 1
834-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:3
834+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:3
835835
; GCN-NEXT: s_nop 3
836836
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
837837
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -880,7 +880,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0__cons
880880
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
881881
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
882882
; GCN-NEXT: s_nop 1
883-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3
883+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3
884884
; GCN-NEXT: s_nop 3
885885
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
886886
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -928,7 +928,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1__cons
928928
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
929929
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
930930
; GCN-NEXT: s_nop 1
931-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:1
931+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 blgp:1
932932
; GCN-NEXT: s_nop 3
933933
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
934934
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -976,7 +976,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2__cons
976976
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
977977
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
978978
; GCN-NEXT: s_nop 1
979-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:2
979+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:2
980980
; GCN-NEXT: s_nop 3
981981
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
982982
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1024,7 +1024,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4__cons
10241024
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
10251025
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
10261026
; GCN-NEXT: s_nop 1
1027-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:4
1027+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:3 blgp:4
10281028
; GCN-NEXT: s_nop 3
10291029
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
10301030
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1072,7 +1072,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3__cons
10721072
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
10731073
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
10741074
; GCN-NEXT: s_nop 1
1075-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:3
1075+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:3
10761076
; GCN-NEXT: s_nop 3
10771077
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
10781078
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1120,7 +1120,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4__cons
11201120
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
11211121
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
11221122
; GCN-NEXT: s_nop 1
1123-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:4
1123+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:2 blgp:4
11241124
; GCN-NEXT: s_nop 3
11251125
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
11261126
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1168,7 +1168,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0__cons
11681168
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
11691169
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
11701170
; GCN-NEXT: s_nop 1
1171-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4
1171+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4
11721172
; GCN-NEXT: s_nop 3
11731173
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
11741174
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1216,7 +1216,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1__cons
12161216
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
12171217
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
12181218
; GCN-NEXT: s_nop 1
1219-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:1
1219+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 blgp:1
12201220
; GCN-NEXT: s_nop 3
12211221
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
12221222
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1264,7 +1264,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2__cons
12641264
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
12651265
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
12661266
; GCN-NEXT: s_nop 1
1267-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:2
1267+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:2
12681268
; GCN-NEXT: s_nop 3
12691269
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
12701270
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1312,7 +1312,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3__cons
13121312
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
13131313
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
13141314
; GCN-NEXT: s_nop 1
1315-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:3
1315+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:3
13161316
; GCN-NEXT: s_nop 3
13171317
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
13181318
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1360,7 +1360,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4__cons
13601360
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
13611361
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
13621362
; GCN-NEXT: s_nop 1
1363-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:4
1363+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:4 blgp:4
13641364
; GCN-NEXT: s_nop 3
13651365
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
13661366
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1976,7 +1976,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_a(
19761976
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
19771977
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
19781978
; GCN-NEXT: s_nop 1
1979-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0]
1979+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3]
19801980
; GCN-NEXT: s_nop 3
19811981
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
19821982
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1997,7 +1997,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_b(
19971997
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
19981998
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
19991999
; GCN-NEXT: s_nop 1
2000-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0]
2000+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3]
20012001
; GCN-NEXT: s_nop 3
20022002
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
20032003
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -2130,7 +2130,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6_
21302130
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
21312131
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
21322132
; GCN-NEXT: s_nop 1
2133-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:2
2133+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:2 blgp:2
21342134
; GCN-NEXT: s_nop 3
21352135
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
21362136
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -2268,7 +2268,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4_
22682268
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
22692269
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
22702270
; GCN-NEXT: s_nop 1
2271-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:4
2271+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:4 blgp:4
22722272
; GCN-NEXT: s_nop 3
22732273
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
22742274
; GCN-NEXT: v_accvgpr_read_b32 v1, a1

0 commit comments

Comments
 (0)