-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][MC][True16] Support VOP2 instructions with true16 format #115233
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -374,6 +374,12 @@ class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { | |
} | ||
|
||
def VOP_MADAK_F16 : VOP_MADAK <f16>; | ||
def VOP_MADAK_F16_t16 : VOP_MADAK <f16> { | ||
let IsTrue16 = 1; | ||
let IsRealTrue16 = 1; | ||
let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret; | ||
let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm); | ||
} | ||
def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> { | ||
let IsTrue16 = 1; | ||
let DstRC = getVALUDstForVT_fake16<DstVT>.ret; | ||
|
@@ -399,6 +405,12 @@ class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { | |
} | ||
|
||
def VOP_MADMK_F16 : VOP_MADMK <f16>; | ||
def VOP_MADMK_F16_t16 : VOP_MADMK <f16> { | ||
let IsTrue16 = 1; | ||
let IsRealTrue16 = 1; | ||
let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret; | ||
let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1); | ||
} | ||
def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> { | ||
let IsTrue16 = 1; | ||
let DstRC = getVALUDstForVT_fake16<DstVT>.ret; | ||
|
@@ -467,6 +479,42 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v | |
} | ||
|
||
def VOP_MAC_F16 : VOP_MAC <f16>; | ||
def VOP_MAC_F16_t16 : VOP_MAC <f16> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a lot of copy paste. Is there a way to avoid replicating so many lets? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see a way to reduce it much. These lets are pretty much standard for a true16 instruction. But we have the true16 lets, and all the VOP_MAC lets to support. We could inherit from VOPProfile_TRUE16 instead of VOP_MAC, but then we'd have all the same lets here as in VOP_MAC. One upside is when the default is switched to true16, we can delete 6 lines related to VOP3DPP. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks Joe for the explanation. let me double check if there are some lines can be removed, but for the majority of them I think they might have to stay |
||
let IsTrue16 = 1; | ||
let IsRealTrue16 = 1; | ||
let HasOpSel = 1; | ||
let DstRC = VOPDstOperand_t16Lo128; | ||
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2); | ||
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret; | ||
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret; | ||
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret; | ||
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, | ||
Src1ModDPP:$src1_modifiers, Src1DPP:$src1, | ||
getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument | ||
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, | ||
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); | ||
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, | ||
Src1ModDPP:$src1_modifiers, Src1DPP:$src1, | ||
getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument | ||
dpp8:$dpp8, Dpp8FI:$fi); | ||
let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret; | ||
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret; | ||
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret; | ||
let Src0VOP3DPP = VGPRSrc_16; | ||
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret; | ||
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret; | ||
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret; | ||
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret; | ||
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret; | ||
let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; | ||
} | ||
def VOP_MAC_F16_fake16 : VOP_MAC <f16> { | ||
let IsTrue16 = 1; | ||
let DstRC = getVALUDstForVT_fake16<DstVT>.ret; | ||
|
@@ -998,6 +1046,9 @@ let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in { | |
let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { | ||
def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; | ||
} | ||
let True16Predicate = UseRealTrue16Insts in { | ||
def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; | ||
} | ||
let True16Predicate = UseFakeTrue16Insts in { | ||
def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">; | ||
} | ||
|
@@ -1006,6 +1057,9 @@ let isCommutable = 1 in { | |
let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { | ||
def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; | ||
} | ||
let True16Predicate = UseRealTrue16Insts in { | ||
def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; | ||
} | ||
let True16Predicate = UseFakeTrue16Insts in { | ||
def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">; | ||
} | ||
|
@@ -1020,6 +1074,9 @@ let SubtargetPredicate = isGFX10Plus in { | |
let True16Predicate = NotHasTrue16BitInsts in { | ||
defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; | ||
} | ||
let True16Predicate = UseRealTrue16Insts in { | ||
defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; | ||
} | ||
let True16Predicate = UseFakeTrue16Insts in { | ||
defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16", VOP_MAC_F16_fake16>; | ||
} | ||
|
@@ -1692,8 +1749,8 @@ multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, stri | |
VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>; | ||
|
||
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> { | ||
defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">; | ||
defm OpName#"_fake16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">; | ||
defm _t16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">; | ||
defm _fake16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">; | ||
} | ||
|
||
multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> : | ||
|
@@ -1712,7 +1769,14 @@ multiclass VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmN | |
|
||
multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName, | ||
string opName = NAME> : | ||
VOP2_Real_FULL_with_name_gfx11_gfx12<op, opName, asmName>; | ||
VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, | ||
VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; | ||
|
||
multiclass VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName, | ||
string opName = NAME> { | ||
defm _t16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_t16">; | ||
defm _fake16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_fake16">; | ||
} | ||
|
||
multiclass VOP2_Real_FULL_gfx11<bits<6> op> : | ||
VOP2_Real_FULL<GFX11Gen, op>; | ||
|
@@ -1747,15 +1811,15 @@ defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16 | |
defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">; | ||
defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; | ||
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; | ||
defm V_FMAC_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">; | ||
defm V_FMAC_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x036, "v_fmac_f16">; | ||
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; | ||
defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; | ||
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; | ||
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; | ||
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; | ||
defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; | ||
defm V_FMAMK_F16_fake16 : VOP2Only_Real_MADK_t16_gfx11_gfx12<0x037, "v_fmamk_f16">; | ||
defm V_FMAAK_F16_fake16 : VOP2Only_Real_MADK_t16_gfx11_gfx12<0x038, "v_fmaak_f16">; | ||
defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037, "v_fmamk_f16">; | ||
defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">; | ||
|
||
// VOP3 only. | ||
defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks this can be combined with decodeOperand_VSrcT16_Lo128(), e.g., by adding a
Deferred
template parameter?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it can be combined but It seems to be a naming convension in this file that the deferred decoder has the defered name on the function. So it might be better to keep them?