Skip to content

Commit 3fee36e

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Optimize mfma_scale intrinsics with 0 inputs (llvm#116724)
We can use the unscaled form of the instruction if we know the scale factors are both 0.
1 parent 2a450e7 commit 3fee36e

File tree

4 files changed

+123
-106
lines changed

4 files changed

+123
-106
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,19 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
304304
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
305305
>;
306306

307+
308+
// Optimize v_mfma_scale* instructions to avoid the scale if the
309+
// scales are known 0.
310+
class UnscaledMFMAOptimizationPat<SDPatternOperator intrin> : PatFrag<
311+
(ops node:$srca, node:$srcb, node:$srcc,
312+
node:$cbsz, node:$blgp),
313+
(intrin $srca, $srcb, $srcc, $cbsz, $blgp,
314+
srcvalue, 0, srcvalue, 0)
315+
>;
316+
317+
def mfma_f32_16x16x128_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_16x16x128_f8f6f4>;
318+
def mfma_f32_32x32x64_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_32x32x64_f8f6f4>;
319+
307320
//===----------------------------------------------------------------------===//
308321
// ValueType helpers
309322
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
894894

895895
// Each of SrcA and SrcB can be encoded using 3 different sizes, so
896896
// define 9 permutations of register classes.
897-
multiclass MAIInst_SrcFormats_mc<string OpName, string ProfileSuffix, SDPatternOperator node = null_frag> {
897+
multiclass MAIInst_SrcFormats_mc<string OpName, string ProfileSuffix, SDPatternOperator node> {
898898
defvar HasAbid = false;
899899
defm _f8_f8 : MAIInst<OpName, "F32_V8I32_V8I32"#ProfileSuffix, node, HasAbid>;
900900
defm _f8_f6 : MAIInst<OpName, "F32_V8I32_V6I32"#ProfileSuffix, node, HasAbid>;
@@ -954,9 +954,9 @@ defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16
954954
defm V_MFMA_F32_32X32X16_BF16 : MAIInst<"v_mfma_f32_32x32x16bf16", "F32_V8BF16_X16", int_amdgcn_mfma_f32_32x32x16_bf16>;
955955

956956
defm V_MFMA_F32_16X16X128_F8F6F4 : MAIInst_SrcFormats_mc<"v_mfma_f32_16x16x128f8f6f4",
957-
"_X128">;
957+
"_X128", mfma_f32_16x16x128_f8f6f4>;
958958
defm V_MFMA_F32_32X32X64_F8F6F4 : MAIInst_SrcFormats_mc<"v_mfma_f32_32x32x64f8f6f4",
959-
"_X512">;
959+
"_X512", mfma_f32_32x32x64_f8f6f4>;
960960

961961
defm V_MFMA_SCALE_F32_16X16X128_F8F6F4 : MAIInst_SrcFormats_Scaled_mc<
962962
"v_mfma_scale_f32_16x16x128_f8f6f4", "V_MFMA_F32_16X16X128_F8F6F4",

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0__cons
207207
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
208208
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
209209
; GCN-NEXT: s_nop 1
210-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0]
210+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3]
211211
; GCN-NEXT: s_nop 3
212212
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
213213
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -255,7 +255,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1__cons
255255
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
256256
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
257257
; GCN-NEXT: s_nop 1
258-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:1
258+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] blgp:1
259259
; GCN-NEXT: s_nop 3
260260
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
261261
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -303,7 +303,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2__cons
303303
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
304304
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
305305
; GCN-NEXT: s_nop 1
306-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:2
306+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:2
307307
; GCN-NEXT: s_nop 3
308308
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
309309
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -351,7 +351,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3__cons
351351
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
352352
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
353353
; GCN-NEXT: s_nop 1
354-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:3
354+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:3
355355
; GCN-NEXT: s_nop 3
356356
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
357357
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -399,7 +399,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4__cons
399399
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
400400
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
401401
; GCN-NEXT: s_nop 1
402-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] blgp:4
402+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] blgp:4
403403
; GCN-NEXT: s_nop 3
404404
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
405405
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -447,7 +447,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0__cons
447447
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
448448
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
449449
; GCN-NEXT: s_nop 1
450-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1
450+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1
451451
; GCN-NEXT: s_nop 3
452452
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
453453
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -496,7 +496,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1__cons
496496
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
497497
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
498498
; GCN-NEXT: s_nop 1
499-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:1
499+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 blgp:1
500500
; GCN-NEXT: s_nop 3
501501
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
502502
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -543,7 +543,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2__cons
543543
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
544544
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
545545
; GCN-NEXT: s_nop 1
546-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:2
546+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:2
547547
; GCN-NEXT: s_nop 3
548548
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
549549
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -591,7 +591,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3__cons
591591
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
592592
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
593593
; GCN-NEXT: s_nop 1
594-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:3
594+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:3
595595
; GCN-NEXT: s_nop 3
596596
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
597597
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -639,7 +639,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4__cons
639639
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
640640
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
641641
; GCN-NEXT: s_nop 1
642-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:1 blgp:4
642+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] cbsz:1 blgp:4
643643
; GCN-NEXT: s_nop 3
644644
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
645645
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -687,7 +687,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0__cons
687687
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
688688
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
689689
; GCN-NEXT: s_nop 1
690-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2
690+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2
691691
; GCN-NEXT: s_nop 3
692692
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
693693
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -735,7 +735,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1__cons
735735
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
736736
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
737737
; GCN-NEXT: s_nop 1
738-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:1
738+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 blgp:1
739739
; GCN-NEXT: s_nop 3
740740
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
741741
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -783,7 +783,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2__cons
783783
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
784784
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
785785
; GCN-NEXT: s_nop 1
786-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:2
786+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:2
787787
; GCN-NEXT: s_nop 3
788788
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
789789
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -831,7 +831,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3__cons
831831
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
832832
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
833833
; GCN-NEXT: s_nop 1
834-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:3
834+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:3
835835
; GCN-NEXT: s_nop 3
836836
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
837837
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -880,7 +880,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0__cons
880880
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
881881
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
882882
; GCN-NEXT: s_nop 1
883-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3
883+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3
884884
; GCN-NEXT: s_nop 3
885885
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
886886
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -928,7 +928,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1__cons
928928
; GCN-NEXT: v_accvgpr_write_b32 a2, v16
929929
; GCN-NEXT: v_accvgpr_write_b32 a3, v17
930930
; GCN-NEXT: s_nop 1
931-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:1
931+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 blgp:1
932932
; GCN-NEXT: s_nop 3
933933
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
934934
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -976,7 +976,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2__cons
976976
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
977977
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
978978
; GCN-NEXT: s_nop 1
979-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:2
979+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:2
980980
; GCN-NEXT: s_nop 3
981981
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
982982
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1024,7 +1024,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4__cons
10241024
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
10251025
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
10261026
; GCN-NEXT: s_nop 1
1027-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:4
1027+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:3 blgp:4
10281028
; GCN-NEXT: s_nop 3
10291029
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
10301030
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1072,7 +1072,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3__cons
10721072
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
10731073
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
10741074
; GCN-NEXT: s_nop 1
1075-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:3 blgp:3
1075+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:3
10761076
; GCN-NEXT: s_nop 3
10771077
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
10781078
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1120,7 +1120,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4__cons
11201120
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
11211121
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
11221122
; GCN-NEXT: s_nop 1
1123-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:4
1123+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:2 blgp:4
11241124
; GCN-NEXT: s_nop 3
11251125
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
11261126
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1168,7 +1168,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0__cons
11681168
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
11691169
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
11701170
; GCN-NEXT: s_nop 1
1171-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4
1171+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4
11721172
; GCN-NEXT: s_nop 3
11731173
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
11741174
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1216,7 +1216,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1__cons
12161216
; GCN-NEXT: v_accvgpr_write_b32 a2, v14
12171217
; GCN-NEXT: v_accvgpr_write_b32 a3, v15
12181218
; GCN-NEXT: s_nop 1
1219-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:1
1219+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 blgp:1
12201220
; GCN-NEXT: s_nop 3
12211221
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
12221222
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1264,7 +1264,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2__cons
12641264
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
12651265
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
12661266
; GCN-NEXT: s_nop 1
1267-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:2
1267+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:2
12681268
; GCN-NEXT: s_nop 3
12691269
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
12701270
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1312,7 +1312,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3__cons
13121312
; GCN-NEXT: v_accvgpr_write_b32 a2, v12
13131313
; GCN-NEXT: v_accvgpr_write_b32 a3, v13
13141314
; GCN-NEXT: s_nop 1
1315-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:3
1315+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:3
13161316
; GCN-NEXT: s_nop 3
13171317
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
13181318
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1360,7 +1360,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4__cons
13601360
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
13611361
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
13621362
; GCN-NEXT: s_nop 1
1363-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:4
1363+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:4 blgp:4
13641364
; GCN-NEXT: s_nop 3
13651365
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
13661366
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1943,7 +1943,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_a(
19431943
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
19441944
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
19451945
; GCN-NEXT: s_nop 1
1946-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0]
1946+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3]
19471947
; GCN-NEXT: s_nop 3
19481948
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
19491949
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -1964,7 +1964,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_b(
19641964
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
19651965
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
19661966
; GCN-NEXT: s_nop 1
1967-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0]
1967+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3]
19681968
; GCN-NEXT: s_nop 3
19691969
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
19701970
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -2097,7 +2097,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6_
20972097
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
20982098
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
20992099
; GCN-NEXT: s_nop 1
2100-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:2 blgp:2
2100+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:2 blgp:2
21012101
; GCN-NEXT: s_nop 3
21022102
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
21032103
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
@@ -2235,7 +2235,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4_
22352235
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
22362236
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
22372237
; GCN-NEXT: s_nop 1
2238-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 0 op_sel_hi:[0,0,0] cbsz:4 blgp:4
2238+
; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:4 blgp:4
22392239
; GCN-NEXT: s_nop 3
22402240
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
22412241
; GCN-NEXT: v_accvgpr_read_b32 v1, a1

0 commit comments

Comments
 (0)