Skip to content

Commit a06c2c8

Browse files
bcahoonVigneshwarJ
andauthored
[AMDGPU] Fix scale opsel flags for scaled MFMA operations (llvm#140183) (llvm#2466)
Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA. (cherry picked from commit e12cbd8) Co-authored-by: Vigneshwar Jayakumar <[email protected]>
1 parent ddde5bd commit a06c2c8

File tree

7 files changed

+254
-78
lines changed

7 files changed

+254
-78
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
18721872

18731873
void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
18741874
OptionalImmIndexMap &OptionalIdx);
1875+
void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
18751876
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
18761877
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
18771878
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
@@ -6765,17 +6766,25 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
67656766
return ParseStatus::Success;
67666767
}
67676768

6768-
static void addOptionalImmOperand(
6769-
MCInst& Inst, const OperandVector& Operands,
6770-
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6771-
AMDGPUOperand::ImmTy ImmT,
6772-
int64_t Default = 0) {
6769+
static void
6770+
addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
6771+
AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
6772+
AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
6773+
std::optional<unsigned> InsertAt = std::nullopt) {
67736774
auto i = OptionalIdx.find(ImmT);
67746775
if (i != OptionalIdx.end()) {
67756776
unsigned Idx = i->second;
6776-
((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6777+
const AMDGPUOperand &Op =
6778+
static_cast<const AMDGPUOperand &>(*Operands[Idx]);
6779+
if (InsertAt)
6780+
Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
6781+
else
6782+
Op.addImmOperands(Inst, 1);
67776783
} else {
6778-
Inst.addOperand(MCOperand::createImm(Default));
6784+
if (InsertAt.has_value())
6785+
Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
6786+
else
6787+
Inst.addOperand(MCOperand::createImm(Default));
67796788
}
67806789
}
67816790

@@ -8794,6 +8803,70 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
87948803
Inst.getOperand(ModIdx).setImm(ModVal);
87958804
}
87968805
}
8806+
void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
8807+
const OperandVector &Operands) {
8808+
OptionalImmIndexMap OptionalIdx;
8809+
unsigned Opc = Inst.getOpcode();
8810+
unsigned I = 1;
8811+
8812+
const MCInstrDesc &Desc = MII.get(Opc);
8813+
8814+
for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
8815+
static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
8816+
8817+
for (unsigned E = Operands.size(); I != E; ++I) {
8818+
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
8819+
8820+
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8821+
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8822+
} else if (Op.isImmModifier()) {
8823+
OptionalIdx[Op.getImmTy()] = I;
8824+
} else {
8825+
Op.addRegOrImmOperands(Inst, 1);
8826+
}
8827+
}
8828+
8829+
// Insert CBSZ and BLGP operands for F8F6F4 variants
8830+
int InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
8831+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCBSZ,
8832+
0, InsertPos);
8833+
InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
8834+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyBLGP,
8835+
0, InsertPos);
8836+
8837+
// Add dummy src_modifiers
8838+
Inst.addOperand(MCOperand::createImm(0));
8839+
Inst.addOperand(MCOperand::createImm(0));
8840+
8841+
// Handle op_sel fields
8842+
8843+
unsigned OpSel = 0;
8844+
auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
8845+
if (OpselIdx != OptionalIdx.end()) {
8846+
OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
8847+
.getImm();
8848+
}
8849+
8850+
unsigned OpSelHi = 0;
8851+
auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
8852+
if (OpselHiIdx != OptionalIdx.end()) {
8853+
OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
8854+
.getImm();
8855+
}
8856+
const int16_t ModOps[] = {AMDGPU::OpName::src0_modifiers,
8857+
AMDGPU::OpName::src1_modifiers};
8858+
8859+
for (unsigned J = 0; J < 2; ++J) {
8860+
unsigned ModVal = 0;
8861+
if (OpSel & (1 << J))
8862+
ModVal |= SISrcMods::OP_SEL_0;
8863+
if (OpSelHi & (1 << J))
8864+
ModVal |= SISrcMods::OP_SEL_1;
8865+
8866+
const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8867+
Inst.getOperand(ModIdx).setImm(ModVal);
8868+
}
8869+
}
87978870

87988871
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
87998872
OptionalImmIndexMap &OptionalIdx) {

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -785,12 +785,12 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo ps, string F8F8Op> :
785785
// Currently assumes scaled instructions never have abid
786786
class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag <
787787
!if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp,
788-
node:$scale_src0_opsel, node:$scale_src0,
789-
node:$scale_src1_opsel, node:$scale_src1),
788+
node:$src0_modifiers, node:$scale_src0,
789+
node:$src1_modifiers, node:$scale_src1),
790790
!con((ops node:$src0, node:$src1, node:$src2, node:$cbsz),
791791
!if(HasAbid, (ops node:$abid), (ops)),
792792
(ops node:$blgp))),
793-
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $scale_src0_opsel, $scale_src0, $scale_src1_opsel, $scale_src1),
793+
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1),
794794
!if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
795795
(Op $src0, $src1, $src2, $cbsz, $blgp))),
796796
pred
@@ -851,12 +851,12 @@ class ScaledMAIInst<string OpName, MAIInst BaseInst, SDPatternOperator node> :
851851
let InOperandList = !con(BaseInst.InOperandList,
852852
(ins VSrc_b32:$scale_src0,
853853
VSrc_b32:$scale_src1,
854-
op_sel0:$scale_src0_opsel,
855-
op_sel_hi0:$scale_src1_opsel));
854+
op_sel0:$src0_modifiers,
855+
op_sel_hi0:$src1_modifiers));
856856
let AsmOperands =
857857
"$vdst, $src0, $src1, $src2, $scale_src0, $scale_src1"
858-
"$scale_src0_opsel$scale_src1_opsel$cbsz$blgp";
859-
858+
"$src0_modifiers$src1_modifiers$cbsz$blgp";
859+
let AsmMatchConverter = "cvtScaledMFMA";
860860
let FixedSize = 1;
861861
let Size = 16;
862862
}
@@ -1997,7 +1997,6 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
19971997
defvar PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64");
19981998
defvar Name = PS_ACD.Mnemonic;
19991999
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
2000-
20012000
let SubtargetPredicate = HasGFX950Insts,
20022001
DecoderNamespace = "GFX940",
20032002
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -528,14 +528,16 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
528528
bits<9> scale_src0;
529529
bits<9> scale_src1;
530530

531-
bits<2> scale_src0_opsel;
532-
bits<2> scale_src1_opsel;
531+
//MFMALdScaleModifierOp transforms 2 bit opsel input to 4 bit value
532+
//where opsel and opselHi are in 3rd and 4th bit.
533+
bits<4> src0_modifiers;
534+
bits<4> src1_modifiers;
533535

534536
// Inst{7-0} = unused
535537
// Inst{10-8} = neg_hi;
536538
// Inst{13-11} = op_sel
537-
let Inst{11} = scale_src0_opsel{0};
538-
let Inst{12} = scale_src1_opsel{0};
539+
let Inst{11} = src0_modifiers{2}; //opsel[0]
540+
let Inst{12} = src1_modifiers{2}; //opsel[1]
539541
// Inst{13} = unused op_sel
540542
// Inst{14} = unused op_sel_hi2
541543

@@ -544,8 +546,8 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
544546
let Inst{49-41} = scale_src1;
545547
// Inst{50-58} = unused
546548
// Inst{60-59} = op_sel_hi;
547-
let Inst{59} = scale_src0_opsel{1};
548-
let Inst{60} = scale_src1_opsel{1};
549+
let Inst{59} = src0_modifiers{3}; //opsel_hi[0]
550+
let Inst{60} = src1_modifiers{3}; //opsel_hi[1]
549551
// Inst{63-61} = neg;
550552

551553
// The high half of the encoding is the unscaled mfma op.
@@ -1433,17 +1435,17 @@ class getVOP3MAIScaledPat<VOPProfile P, SDPatternOperator node> {
14331435
// mfma
14341436
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
14351437
timm:$cbsz, timm:$blgp,
1436-
MFMALdScaleModifierOp:$scale_src0_opsel,
1438+
MFMALdScaleModifierOp:$src0_modifiers,
14371439
i32:$scale_src0,
1438-
MFMALdScaleModifierOp:$scale_src1_opsel,
1440+
MFMALdScaleModifierOp:$src1_modifiers,
14391441
i32:$scale_src1
14401442
))],
14411443
// smfmac
14421444
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx,
14431445
timm:$cbsz, timm:$abid,
1444-
MFMALdScaleModifierOp:$scale_src0_opsel,
1446+
MFMALdScaleModifierOp:$src0_modifiers,
14451447
i32:$scale_src0,
1446-
MFMALdScaleModifierOp:$scale_src1_opsel,
1448+
MFMALdScaleModifierOp:$src1_modifiers,
14471449
i32:$scale_src1))]);
14481450
}
14491451

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
4848
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
4949
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
5050
; GCN-NEXT: s_nop 1
51-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
51+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
5252
; GCN-NEXT: s_nop 7
5353
; GCN-NEXT: s_nop 3
5454
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -72,7 +72,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
7272
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
7373
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
7474
; GCN-NEXT: s_nop 1
75-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
75+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1,0]
7676
; GCN-NEXT: s_nop 7
7777
; GCN-NEXT: s_nop 3
7878
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -96,7 +96,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
9696
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
9797
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
9898
; GCN-NEXT: s_nop 1
99-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
99+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1,0]
100100
; GCN-NEXT: s_nop 7
101101
; GCN-NEXT: s_nop 3
102102
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -120,7 +120,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
120120
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
121121
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
122122
; GCN-NEXT: s_nop 1
123-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
123+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[0,1,0]
124124
; GCN-NEXT: s_nop 7
125125
; GCN-NEXT: s_nop 3
126126
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -144,7 +144,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
144144
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
145145
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
146146
; GCN-NEXT: s_nop 1
147-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
147+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,0,0]
148148
; GCN-NEXT: s_nop 7
149149
; GCN-NEXT: s_nop 3
150150
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -168,7 +168,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
168168
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
169169
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
170170
; GCN-NEXT: s_nop 1
171-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
171+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[1,1,0]
172172
; GCN-NEXT: s_nop 7
173173
; GCN-NEXT: s_nop 3
174174
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -192,7 +192,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
192192
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
193193
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
194194
; GCN-NEXT: s_nop 1
195-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
195+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,1,0]
196196
; GCN-NEXT: s_nop 7
197197
; GCN-NEXT: s_nop 3
198198
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1803,7 +1803,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
18031803
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
18041804
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
18051805
; GCN-NEXT: s_nop 1
1806-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0]
1806+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1,0]
18071807
; GCN-NEXT: s_nop 7
18081808
; GCN-NEXT: s_nop 3
18091809
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1825,7 +1825,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18251825
; SDAG-NEXT: v_accvgpr_write_b32 a2, v18
18261826
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
18271827
; SDAG-NEXT: s_nop 1
1828-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0]
1828+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1,0]
18291829
; SDAG-NEXT: s_nop 7
18301830
; SDAG-NEXT: s_nop 3
18311831
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1843,7 +1843,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18431843
; GISEL-NEXT: v_accvgpr_write_b32 a3, v19
18441844
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
18451845
; GISEL-NEXT: s_nop 1
1846-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
1846+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1,0]
18471847
; GISEL-NEXT: s_nop 7
18481848
; GISEL-NEXT: s_nop 3
18491849
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1866,7 +1866,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18661866
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
18671867
; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d
18681868
; SDAG-NEXT: s_nop 1
1869-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0]
1869+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1,0]
18701870
; SDAG-NEXT: s_nop 7
18711871
; SDAG-NEXT: s_nop 3
18721872
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1885,7 +1885,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18851885
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
18861886
; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d
18871887
; GISEL-NEXT: s_nop 1
1888-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0]
1888+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1,0]
18891889
; GISEL-NEXT: s_nop 7
18901890
; GISEL-NEXT: s_nop 3
18911891
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1927,7 +1927,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
19271927
; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
19281928
; SDAG-NEXT: v_mov_b32_e32 v17, s13
19291929
; SDAG-NEXT: s_nop 1
1930-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2
1930+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
19311931
; SDAG-NEXT: s_nop 7
19321932
; SDAG-NEXT: s_nop 3
19331933
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15]
@@ -1952,7 +1952,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
19521952
; GISEL-NEXT: v_accvgpr_write_b32 a3, s27
19531953
; GISEL-NEXT: v_mov_b32_e32 v16, s29
19541954
; GISEL-NEXT: s_nop 1
1955-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2
1955+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
19561956
; GISEL-NEXT: v_mov_b32_e32 v0, 0
19571957
; GISEL-NEXT: s_nop 7
19581958
; GISEL-NEXT: s_nop 2
@@ -1993,7 +1993,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
19931993
; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
19941994
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
19951995
; SDAG-NEXT: s_nop 1
1996-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0]
1996+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
19971997
; SDAG-NEXT: s_nop 7
19981998
; SDAG-NEXT: s_nop 3
19991999
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5]
@@ -2019,7 +2019,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
20192019
; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
20202020
; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
20212021
; GISEL-NEXT: s_nop 1
2022-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
2022+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
20232023
; GISEL-NEXT: v_mov_b32_e32 v0, 0
20242024
; GISEL-NEXT: s_nop 7
20252025
; GISEL-NEXT: s_nop 2

0 commit comments

Comments
 (0)