Skip to content

Commit e12cbd8

Browse files
authored
[AMDGPU] Fix scale opsel flags for scaled MFMA operations (#140183)
Fix for src scale opsel flags encoding and ASM parsing for gfx950 scaled MFMA.
1 parent d219a71 commit e12cbd8

File tree

7 files changed

+254
-78
lines changed

7 files changed

+254
-78
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1878,6 +1878,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
18781878

18791879
void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
18801880
OptionalImmIndexMap &OptionalIdx);
1881+
void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
18811882
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
18821883
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
18831884
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
@@ -6784,17 +6785,25 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
67846785
return ParseStatus::Success;
67856786
}
67866787

6787-
static void addOptionalImmOperand(
6788-
MCInst& Inst, const OperandVector& Operands,
6789-
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6790-
AMDGPUOperand::ImmTy ImmT,
6791-
int64_t Default = 0) {
6788+
static void
6789+
addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
6790+
AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
6791+
AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
6792+
std::optional<unsigned> InsertAt = std::nullopt) {
67926793
auto i = OptionalIdx.find(ImmT);
67936794
if (i != OptionalIdx.end()) {
67946795
unsigned Idx = i->second;
6795-
((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6796+
const AMDGPUOperand &Op =
6797+
static_cast<const AMDGPUOperand &>(*Operands[Idx]);
6798+
if (InsertAt)
6799+
Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
6800+
else
6801+
Op.addImmOperands(Inst, 1);
67966802
} else {
6797-
Inst.addOperand(MCOperand::createImm(Default));
6803+
if (InsertAt.has_value())
6804+
Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
6805+
else
6806+
Inst.addOperand(MCOperand::createImm(Default));
67986807
}
67996808
}
68006809

@@ -8811,6 +8820,70 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
88118820
Inst.getOperand(ModIdx).setImm(ModVal);
88128821
}
88138822
}
8823+
void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
8824+
const OperandVector &Operands) {
8825+
OptionalImmIndexMap OptionalIdx;
8826+
unsigned Opc = Inst.getOpcode();
8827+
unsigned I = 1;
8828+
8829+
const MCInstrDesc &Desc = MII.get(Opc);
8830+
8831+
for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
8832+
static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
8833+
8834+
for (unsigned E = Operands.size(); I != E; ++I) {
8835+
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
8836+
8837+
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8838+
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8839+
} else if (Op.isImmModifier()) {
8840+
OptionalIdx[Op.getImmTy()] = I;
8841+
} else {
8842+
Op.addRegOrImmOperands(Inst, 1);
8843+
}
8844+
}
8845+
8846+
// Insert CBSZ and BLGP operands for F8F6F4 variants
8847+
int InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
8848+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCBSZ,
8849+
0, InsertPos);
8850+
InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
8851+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyBLGP,
8852+
0, InsertPos);
8853+
8854+
// Add dummy src_modifiers
8855+
Inst.addOperand(MCOperand::createImm(0));
8856+
Inst.addOperand(MCOperand::createImm(0));
8857+
8858+
// Handle op_sel fields
8859+
8860+
unsigned OpSel = 0;
8861+
auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
8862+
if (OpselIdx != OptionalIdx.end()) {
8863+
OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
8864+
.getImm();
8865+
}
8866+
8867+
unsigned OpSelHi = 0;
8868+
auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
8869+
if (OpselHiIdx != OptionalIdx.end()) {
8870+
OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
8871+
.getImm();
8872+
}
8873+
const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
8874+
AMDGPU::OpName::src1_modifiers};
8875+
8876+
for (unsigned J = 0; J < 2; ++J) {
8877+
unsigned ModVal = 0;
8878+
if (OpSel & (1 << J))
8879+
ModVal |= SISrcMods::OP_SEL_0;
8880+
if (OpSelHi & (1 << J))
8881+
ModVal |= SISrcMods::OP_SEL_1;
8882+
8883+
const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8884+
Inst.getOperand(ModIdx).setImm(ModVal);
8885+
}
8886+
}
88148887

88158888
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
88168889
OptionalImmIndexMap &OptionalIdx) {

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -829,12 +829,12 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo ps, string F8F8Op> :
829829
// Currently assumes scaled instructions never have abid
830830
class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag <
831831
!if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp,
832-
node:$scale_src0_opsel, node:$scale_src0,
833-
node:$scale_src1_opsel, node:$scale_src1),
832+
node:$src0_modifiers, node:$scale_src0,
833+
node:$src1_modifiers, node:$scale_src1),
834834
!con((ops node:$src0, node:$src1, node:$src2, node:$cbsz),
835835
!if(HasAbid, (ops node:$abid), (ops)),
836836
(ops node:$blgp))),
837-
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $scale_src0_opsel, $scale_src0, $scale_src1_opsel, $scale_src1),
837+
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1),
838838
!if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
839839
(Op $src0, $src1, $src2, $cbsz, $blgp))),
840840
pred
@@ -895,12 +895,12 @@ class ScaledMAIInst<string OpName, MAIInst BaseInst, SDPatternOperator node> :
895895
let InOperandList = !con(BaseInst.InOperandList,
896896
(ins VSrc_b32:$scale_src0,
897897
VSrc_b32:$scale_src1,
898-
op_sel0:$scale_src0_opsel,
899-
op_sel_hi0:$scale_src1_opsel));
898+
op_sel0:$src0_modifiers,
899+
op_sel_hi0:$src1_modifiers));
900900
let AsmOperands =
901901
"$vdst, $src0, $src1, $src2, $scale_src0, $scale_src1"
902-
"$scale_src0_opsel$scale_src1_opsel$cbsz$blgp";
903-
902+
"$src0_modifiers$src1_modifiers$cbsz$blgp";
903+
let AsmMatchConverter = "cvtScaledMFMA";
904904
let FixedSize = 1;
905905
let Size = 16;
906906
}
@@ -2041,7 +2041,6 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
20412041
defvar PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64");
20422042
defvar Name = PS_ACD.Mnemonic;
20432043
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
2044-
20452044
let SubtargetPredicate = HasGFX950Insts,
20462045
DecoderNamespace = "GFX940",
20472046
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -526,14 +526,16 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
526526
bits<9> scale_src0;
527527
bits<9> scale_src1;
528528

529-
bits<2> scale_src0_opsel;
530-
bits<2> scale_src1_opsel;
529+
//MFMALdScaleModifierOp transforms 2 bit opsel input to 4 bit value
530+
//where opsel and opselHi are in 3rd and 4th bit.
531+
bits<4> src0_modifiers;
532+
bits<4> src1_modifiers;
531533

532534
// Inst{7-0} = unused
533535
// Inst{10-8} = neg_hi;
534536
// Inst{13-11} = op_sel
535-
let Inst{11} = scale_src0_opsel{0};
536-
let Inst{12} = scale_src1_opsel{0};
537+
let Inst{11} = src0_modifiers{2}; //opsel[0]
538+
let Inst{12} = src1_modifiers{2}; //opsel[1]
537539
// Inst{13} = unused op_sel
538540
// Inst{14} = unused op_sel_hi2
539541

@@ -542,8 +544,8 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_
542544
let Inst{49-41} = scale_src1;
543545
// Inst{50-58} = unused
544546
// Inst{60-59} = op_sel_hi;
545-
let Inst{59} = scale_src0_opsel{1};
546-
let Inst{60} = scale_src1_opsel{1};
547+
let Inst{59} = src0_modifiers{3}; //opsel_hi[0]
548+
let Inst{60} = src1_modifiers{3}; //opsel_hi[1]
547549
// Inst{63-61} = neg;
548550

549551
// The high half of the encoding is the unscaled mfma op.
@@ -1437,17 +1439,17 @@ class getVOP3MAIScaledPat<VOPProfile P, SDPatternOperator node> {
14371439
// mfma
14381440
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
14391441
timm:$cbsz, timm:$blgp,
1440-
MFMALdScaleModifierOp:$scale_src0_opsel,
1442+
MFMALdScaleModifierOp:$src0_modifiers,
14411443
i32:$scale_src0,
1442-
MFMALdScaleModifierOp:$scale_src1_opsel,
1444+
MFMALdScaleModifierOp:$src1_modifiers,
14431445
i32:$scale_src1
14441446
))],
14451447
// smfmac
14461448
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx,
14471449
timm:$cbsz, timm:$abid,
1448-
MFMALdScaleModifierOp:$scale_src0_opsel,
1450+
MFMALdScaleModifierOp:$src0_modifiers,
14491451
i32:$scale_src0,
1450-
MFMALdScaleModifierOp:$scale_src1_opsel,
1452+
MFMALdScaleModifierOp:$src1_modifiers,
14511453
i32:$scale_src1))]);
14521454
}
14531455

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
4646
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
4747
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
4848
; GCN-NEXT: s_nop 1
49-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
49+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
5050
; GCN-NEXT: s_nop 7
5151
; GCN-NEXT: s_nop 3
5252
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -70,7 +70,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
7070
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
7171
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
7272
; GCN-NEXT: s_nop 1
73-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
73+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1,0]
7474
; GCN-NEXT: s_nop 7
7575
; GCN-NEXT: s_nop 3
7676
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -94,7 +94,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
9494
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
9595
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
9696
; GCN-NEXT: s_nop 1
97-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
97+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1,0]
9898
; GCN-NEXT: s_nop 7
9999
; GCN-NEXT: s_nop 3
100100
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -118,7 +118,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
118118
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
119119
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
120120
; GCN-NEXT: s_nop 1
121-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
121+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[0,1,0]
122122
; GCN-NEXT: s_nop 7
123123
; GCN-NEXT: s_nop 3
124124
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -142,7 +142,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
142142
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
143143
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
144144
; GCN-NEXT: s_nop 1
145-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
145+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,0,0]
146146
; GCN-NEXT: s_nop 7
147147
; GCN-NEXT: s_nop 3
148148
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -166,7 +166,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
166166
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
167167
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
168168
; GCN-NEXT: s_nop 1
169-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
169+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[0,1,0] op_sel_hi:[1,1,0]
170170
; GCN-NEXT: s_nop 7
171171
; GCN-NEXT: s_nop 3
172172
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -190,7 +190,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
190190
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
191191
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
192192
; GCN-NEXT: s_nop 1
193-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
193+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1,1,0]
194194
; GCN-NEXT: s_nop 7
195195
; GCN-NEXT: s_nop 3
196196
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1797,7 +1797,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
17971797
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
17981798
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
17991799
; GCN-NEXT: s_nop 1
1800-
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0]
1800+
; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1,0]
18011801
; GCN-NEXT: s_nop 7
18021802
; GCN-NEXT: s_nop 3
18031803
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1819,7 +1819,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18191819
; SDAG-NEXT: v_accvgpr_write_b32 a2, v18
18201820
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
18211821
; SDAG-NEXT: s_nop 1
1822-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0]
1822+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1,0]
18231823
; SDAG-NEXT: s_nop 7
18241824
; SDAG-NEXT: s_nop 3
18251825
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1837,7 +1837,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18371837
; GISEL-NEXT: v_accvgpr_write_b32 a3, v19
18381838
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
18391839
; GISEL-NEXT: s_nop 1
1840-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
1840+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1,0]
18411841
; GISEL-NEXT: s_nop 7
18421842
; GISEL-NEXT: s_nop 3
18431843
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1860,7 +1860,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18601860
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
18611861
; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d
18621862
; SDAG-NEXT: s_nop 1
1863-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0]
1863+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1,0]
18641864
; SDAG-NEXT: s_nop 7
18651865
; SDAG-NEXT: s_nop 3
18661866
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1879,7 +1879,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18791879
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
18801880
; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d
18811881
; GISEL-NEXT: s_nop 1
1882-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0]
1882+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1,0]
18831883
; GISEL-NEXT: s_nop 7
18841884
; GISEL-NEXT: s_nop 3
18851885
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1921,7 +1921,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
19211921
; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
19221922
; SDAG-NEXT: v_mov_b32_e32 v17, s13
19231923
; SDAG-NEXT: s_nop 1
1924-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2
1924+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
19251925
; SDAG-NEXT: s_nop 7
19261926
; SDAG-NEXT: s_nop 3
19271927
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15]
@@ -1946,7 +1946,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
19461946
; GISEL-NEXT: v_accvgpr_write_b32 a3, s27
19471947
; GISEL-NEXT: v_mov_b32_e32 v16, s29
19481948
; GISEL-NEXT: s_nop 1
1949-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2
1949+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
19501950
; GISEL-NEXT: v_mov_b32_e32 v0, 0
19511951
; GISEL-NEXT: s_nop 7
19521952
; GISEL-NEXT: s_nop 2
@@ -1987,7 +1987,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
19871987
; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
19881988
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
19891989
; SDAG-NEXT: s_nop 1
1990-
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0]
1990+
; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
19911991
; SDAG-NEXT: s_nop 7
19921992
; SDAG-NEXT: s_nop 3
19931993
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5]
@@ -2013,7 +2013,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
20132013
; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
20142014
; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
20152015
; GISEL-NEXT: s_nop 1
2016-
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0]
2016+
; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
20172017
; GISEL-NEXT: v_mov_b32_e32 v0, 0
20182018
; GISEL-NEXT: s_nop 7
20192019
; GISEL-NEXT: s_nop 2

0 commit comments

Comments
 (0)