-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[AMDGPU] Fix opsel for scaled MFMA operations #140183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
f33f712
67a22cc
e73a9cb
08a5f93
87f8800
8e30e0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1878,6 +1878,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { | |||||||||||||||
|
||||||||||||||||
void cvtVOP3(MCInst &Inst, const OperandVector &Operands, | ||||||||||||||||
OptionalImmIndexMap &OptionalIdx); | ||||||||||||||||
void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands); | ||||||||||||||||
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); | ||||||||||||||||
void cvtVOP3(MCInst &Inst, const OperandVector &Operands); | ||||||||||||||||
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); | ||||||||||||||||
|
@@ -6796,17 +6797,25 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { | |||||||||||||||
return ParseStatus::Success; | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
static void addOptionalImmOperand( | ||||||||||||||||
MCInst& Inst, const OperandVector& Operands, | ||||||||||||||||
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, | ||||||||||||||||
AMDGPUOperand::ImmTy ImmT, | ||||||||||||||||
int64_t Default = 0) { | ||||||||||||||||
static void | ||||||||||||||||
addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, | ||||||||||||||||
AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, | ||||||||||||||||
AMDGPUOperand::ImmTy ImmT, int64_t Default = 0, | ||||||||||||||||
std::optional<unsigned> InsertAt = std::nullopt) { | ||||||||||||||||
auto i = OptionalIdx.find(ImmT); | ||||||||||||||||
if (i != OptionalIdx.end()) { | ||||||||||||||||
unsigned Idx = i->second; | ||||||||||||||||
((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); | ||||||||||||||||
const AMDGPUOperand &Op = | ||||||||||||||||
static_cast<const AMDGPUOperand &>(*Operands[Idx]); | ||||||||||||||||
if (InsertAt) | ||||||||||||||||
Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm())); | ||||||||||||||||
else | ||||||||||||||||
Op.addImmOperands(Inst, 1); | ||||||||||||||||
} else { | ||||||||||||||||
Inst.addOperand(MCOperand::createImm(Default)); | ||||||||||||||||
if (InsertAt.has_value()) | ||||||||||||||||
Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default)); | ||||||||||||||||
else | ||||||||||||||||
Inst.addOperand(MCOperand::createImm(Default)); | ||||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
|
@@ -8823,6 +8832,68 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) | |||||||||||||||
Inst.getOperand(ModIdx).setImm(ModVal); | ||||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst, | ||||||||||||||||
const OperandVector &Operands) { | ||||||||||||||||
OptionalImmIndexMap OptionalIdx; | ||||||||||||||||
unsigned Opc = Inst.getOpcode(); | ||||||||||||||||
unsigned I = 1; | ||||||||||||||||
|
||||||||||||||||
const MCInstrDesc &Desc = MII.get(Opc); | ||||||||||||||||
|
||||||||||||||||
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) | ||||||||||||||||
static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1); | ||||||||||||||||
|
||||||||||||||||
for (unsigned E = Operands.size(); I != E; ++I) { | ||||||||||||||||
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]); | ||||||||||||||||
|
||||||||||||||||
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { | ||||||||||||||||
Op.addRegOrImmWithFPInputModsOperands(Inst, 2); | ||||||||||||||||
} else if (Op.isImmModifier()) { | ||||||||||||||||
OptionalIdx[Op.getImmTy()] = I; | ||||||||||||||||
} else { | ||||||||||||||||
Op.addRegOrImmOperands(Inst, 1); | ||||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
// Insert CBSZ and BLGP operands for F8F6F4 variants | ||||||||||||||||
int InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz); | ||||||||||||||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCBSZ, | ||||||||||||||||
0, InsertPos); | ||||||||||||||||
InsertPos = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); | ||||||||||||||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyBLGP, | ||||||||||||||||
0, InsertPos); | ||||||||||||||||
|
||||||||||||||||
// Add dummy src_modifiers | ||||||||||||||||
Inst.addOperand(MCOperand::createImm(0)); | ||||||||||||||||
Inst.addOperand(MCOperand::createImm(0)); | ||||||||||||||||
|
||||||||||||||||
// Handle op_sel fields | ||||||||||||||||
|
||||||||||||||||
unsigned OpSel = 0; | ||||||||||||||||
auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel); | ||||||||||||||||
if (OpselIdx != OptionalIdx.end()) | ||||||||||||||||
OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second]) | ||||||||||||||||
.getImm(); | ||||||||||||||||
|
||||||||||||||||
unsigned OpSelHi = 0; | ||||||||||||||||
auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi); | ||||||||||||||||
if (OpselHiIdx != OptionalIdx.end()) | ||||||||||||||||
OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second]) | ||||||||||||||||
.getImm(); | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
static const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, | ||||||||||||||||
AMDGPU::OpName::src1_modifiers}; | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
|
||||||||||||||||
for (unsigned J = 0; J < 2; ++J) { | ||||||||||||||||
unsigned ModVal = 0; | ||||||||||||||||
if (OpSel & (1 << J)) | ||||||||||||||||
ModVal |= SISrcMods::OP_SEL_0; | ||||||||||||||||
if (OpSelHi & (1 << J)) | ||||||||||||||||
ModVal |= SISrcMods::OP_SEL_1; | ||||||||||||||||
|
||||||||||||||||
const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); | ||||||||||||||||
Inst.getOperand(ModIdx).setImm(ModVal); | ||||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, | ||||||||||||||||
OptionalImmIndexMap &OptionalIdx) { | ||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -829,12 +829,12 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo ps, string F8F8Op> : | |
// Currently assumes scaled instructions never have abid | ||
class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag < | ||
!if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp, | ||
node:$scale_src0_opsel, node:$scale_src0, | ||
node:$scale_src1_opsel, node:$scale_src1), | ||
node:$src0_modifiers, node:$scale_src0, | ||
node:$src1_modifiers, node:$scale_src1), | ||
!con((ops node:$src0, node:$src1, node:$src2, node:$cbsz), | ||
!if(HasAbid, (ops node:$abid), (ops)), | ||
(ops node:$blgp))), | ||
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $scale_src0_opsel, $scale_src0, $scale_src1_opsel, $scale_src1), | ||
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1), | ||
!if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp), | ||
(Op $src0, $src1, $src2, $cbsz, $blgp))), | ||
pred | ||
|
@@ -895,12 +895,12 @@ class ScaledMAIInst<string OpName, MAIInst BaseInst, SDPatternOperator node> : | |
let InOperandList = !con(BaseInst.InOperandList, | ||
(ins VSrc_b32:$scale_src0, | ||
VSrc_b32:$scale_src1, | ||
op_sel0:$scale_src0_opsel, | ||
op_sel_hi0:$scale_src1_opsel)); | ||
op_sel0:$src0_modifiers, | ||
op_sel_hi0:$src1_modifiers)); | ||
let AsmOperands = | ||
"$vdst, $src0, $src1, $src2, $scale_src0, $scale_src1" | ||
"$scale_src0_opsel$scale_src1_opsel$cbsz$blgp"; | ||
|
||
"$src0_modifiers$src1_modifiers$cbsz$blgp"; | ||
let AsmMatchConverter = "cvtScaledMFMA"; | ||
let FixedSize = 1; | ||
let Size = 16; | ||
} | ||
|
@@ -2041,7 +2041,6 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> { | |
defvar PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64"); | ||
defvar Name = PS_ACD.Mnemonic; | ||
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8"; | ||
|
||
let SubtargetPredicate = HasGFX950Insts, | ||
DecoderNamespace = "GFX940", | ||
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { | ||
|
@@ -2057,7 +2056,7 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> { | |
|
||
multiclass VOP3PX_Real_ScaledMFMA_F8F6F4_mc<bits<7> op> { | ||
defm _f8_f8 : VOP3PX_Real_ScaledMFMA<op>; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Introduced trailing whitespace |
||
let isAsmParserOnly = 1 in { // Disable ambiguous disassembly. | ||
defm _f8_f6 : VOP3PX_Real_ScaledMFMA<op>; | ||
defm _f6_f8 : VOP3PX_Real_ScaledMFMA<op>; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -526,14 +526,14 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_ | |
bits<9> scale_src0; | ||
bits<9> scale_src1; | ||
|
||
bits<2> scale_src0_opsel; | ||
bits<2> scale_src1_opsel; | ||
bits<4> src0_modifiers; | ||
bits<4> src1_modifiers; | ||
Comment on lines
+531
to
+532
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs comment explaining these. If you're writing custom operand handling maybe we shouldn't rename these? It is a weird case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought so too, but this matches the current src_modifiers field and uses an existing bit mask 4 bit value (OpselHI, OpselLo, Abs and Neg) in TableGen. There is also already a logic in place for asm printing using this bit mask. Also, in previous revision, it was a 2-bit field, which was reading the lower 2 bits hence not generating the right encoding for opsel bits. Though this MFMA instruction doesn't use neg and abs, I thought it would be cleaner to match the existing implementation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it does support neg, it's just not wired up There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I see it, I see this in doc " NEG[1:0] and ABS[1:0] must be zero. NEG[2] and ABS[2] may be used to control matrix C" . So I will wire up a src2_modifier with neg/abs bits mapped and add some tests for it as well |
||
|
||
// Inst{7-0} = unused | ||
// Inst{10-8} = neg_hi; | ||
// Inst{13-11} = op_sel | ||
let Inst{11} = scale_src0_opsel{0}; | ||
let Inst{12} = scale_src1_opsel{0}; | ||
let Inst{11} = src0_modifiers{2}; | ||
let Inst{12} = src1_modifiers{2}; | ||
// Inst{13} = unused op_sel | ||
// Inst{14} = unused op_sel_hi2 | ||
|
||
|
@@ -542,8 +542,8 @@ class VOP3PXe <bits<7> op, VOPProfile MFMAPfl, bit acc_cd = 0> : Enc128, VOP3Pe_ | |
let Inst{49-41} = scale_src1; | ||
// Inst{50-58} = unused | ||
// Inst{60-59} = op_sel_hi; | ||
let Inst{59} = scale_src0_opsel{1}; | ||
let Inst{60} = scale_src1_opsel{1}; | ||
let Inst{59} = src0_modifiers{3}; | ||
let Inst{60} = src1_modifiers{3}; | ||
// Inst{63-61} = neg; | ||
|
||
// The high half of the encoding is the unscaled mfma op. | ||
|
@@ -1437,17 +1437,17 @@ class getVOP3MAIScaledPat<VOPProfile P, SDPatternOperator node> { | |
// mfma | ||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, | ||
timm:$cbsz, timm:$blgp, | ||
MFMALdScaleModifierOp:$scale_src0_opsel, | ||
MFMALdScaleModifierOp:$src0_modifiers, | ||
i32:$scale_src0, | ||
MFMALdScaleModifierOp:$scale_src1_opsel, | ||
MFMALdScaleModifierOp:$src1_modifiers, | ||
i32:$scale_src1 | ||
))], | ||
// smfmac | ||
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx, | ||
timm:$cbsz, timm:$abid, | ||
MFMALdScaleModifierOp:$scale_src0_opsel, | ||
MFMALdScaleModifierOp:$src0_modifiers, | ||
i32:$scale_src0, | ||
MFMALdScaleModifierOp:$scale_src1_opsel, | ||
MFMALdScaleModifierOp:$src1_modifiers, | ||
i32:$scale_src1))]); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.