Skip to content

Commit fab28e0

Browse files
committed
Reapply "[AMDGPU] Introduce real and keep fake True16 instructions."
Reverts 6cb3866. Analysis of failures on buildbots with expensive checks enabled showed that the problem was triggered by changes in another commit, 469b3bf, and was caused by the bug addressed in #67245.
1 parent f435f55 commit fab28e0

14 files changed

+144
-66
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,15 @@ def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">,
16931693
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
16941694
def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;
16951695

1696+
// Control use of True16 instructions. The real True16 instructions are
1697+
// True16 instructions as they are defined in the ISA. Fake True16
1698+
// instructions have the same encoding as real ones but syntactically
1699+
// only allow 32-bit registers in operands and use low halves thereof.
1700+
def UseRealTrue16Insts : Predicate<"Subtarget->useRealTrue16Insts()">,
1701+
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
1702+
def UseFakeTrue16Insts : Predicate<"Subtarget->hasTrue16BitInsts() && "
1703+
"!Subtarget->useRealTrue16Insts()">;
1704+
16961705
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
16971706
AssemblerPredicate<(all_of FeatureVOP3P)>;
16981707

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -420,11 +420,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
420420
// encodings
421421
if (isGFX11Plus() && Bytes.size() >= 12 ) {
422422
DecoderUInt128 DecW = eat12Bytes(Bytes);
423-
Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
423+
Res =
424+
tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
425+
MI, DecW, Address, CS);
424426
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
425427
break;
426428
MI = MCInst(); // clear
427-
Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
429+
Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
430+
MI, DecW, Address, CS);
428431
if (Res) {
429432
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
430433
convertVOP3PDPPInst(MI);
@@ -463,15 +466,17 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
463466
break;
464467
MI = MCInst(); // clear
465468

466-
Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
469+
Res = tryDecodeInst(DecoderTableDPP8GFX1164,
470+
DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
467471
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
468472
break;
469473
MI = MCInst(); // clear
470474

471475
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
472476
if (Res) break;
473477

474-
Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
478+
Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
479+
MI, QW, Address, CS);
475480
if (Res) {
476481
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
477482
convertVOPCDPPInst(MI);
@@ -532,7 +537,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
532537
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
533538
if (Res) break;
534539

535-
Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
540+
Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
541+
Address, CS);
536542
if (Res) break;
537543

538544
if (Bytes.size() < 4) break;
@@ -562,7 +568,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
562568
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
563569
if (Res) break;
564570

565-
Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
571+
Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
572+
Address, CS);
566573
if (Res)
567574
break;
568575

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,17 @@ class AMDGPUDisassembler : public MCDisassembler {
144144
return MCDisassembler::Fail;
145145
}
146146

147+
template <typename InsnType>
148+
DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
149+
MCInst &MI, InsnType Inst, uint64_t Address,
150+
raw_ostream &Comments) const {
151+
for (const uint8_t *T : {Table1, Table2}) {
152+
if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
153+
return Res;
154+
}
155+
return MCDisassembler::Fail;
156+
}
157+
147158
std::optional<DecodeStatus>
148159
onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
149160
uint64_t Address, raw_ostream &CStream) const override;

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
14081408
case AMDGPU::V_MAX_F32_e64:
14091409
case AMDGPU::V_MAX_F16_e64:
14101410
case AMDGPU::V_MAX_F16_t16_e64:
1411+
case AMDGPU::V_MAX_F16_fake16_e64:
14111412
case AMDGPU::V_MAX_F64_e64:
14121413
case AMDGPU::V_PK_MAX_F16: {
14131414
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
@@ -1503,7 +1504,8 @@ static int getOModValue(unsigned Opc, int64_t Val) {
15031504
}
15041505
}
15051506
case AMDGPU::V_MUL_F16_e64:
1506-
case AMDGPU::V_MUL_F16_t16_e64: {
1507+
case AMDGPU::V_MUL_F16_t16_e64:
1508+
case AMDGPU::V_MUL_F16_fake16_e64: {
15071509
switch (static_cast<uint16_t>(Val)) {
15081510
case 0x3800: // 0.5
15091511
return SIOutMods::DIV2;
@@ -1530,12 +1532,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
15301532
case AMDGPU::V_MUL_F64_e64:
15311533
case AMDGPU::V_MUL_F32_e64:
15321534
case AMDGPU::V_MUL_F16_t16_e64:
1535+
case AMDGPU::V_MUL_F16_fake16_e64:
15331536
case AMDGPU::V_MUL_F16_e64: {
15341537
// If output denormals are enabled, omod is ignored.
15351538
if ((Op == AMDGPU::V_MUL_F32_e64 &&
15361539
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
15371540
((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
1538-
Op == AMDGPU::V_MUL_F16_t16_e64) &&
1541+
Op == AMDGPU::V_MUL_F16_t16_e64 ||
1542+
Op == AMDGPU::V_MUL_F16_fake16_e64) &&
15391543
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
15401544
return std::pair(nullptr, SIOutMods::NONE);
15411545

@@ -1565,12 +1569,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
15651569
case AMDGPU::V_ADD_F64_e64:
15661570
case AMDGPU::V_ADD_F32_e64:
15671571
case AMDGPU::V_ADD_F16_e64:
1568-
case AMDGPU::V_ADD_F16_t16_e64: {
1572+
case AMDGPU::V_ADD_F16_t16_e64:
1573+
case AMDGPU::V_ADD_F16_fake16_e64: {
15691574
// If output denormals are enabled, omod is ignored.
15701575
if ((Op == AMDGPU::V_ADD_F32_e64 &&
15711576
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
15721577
((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
1573-
Op == AMDGPU::V_ADD_F16_t16_e64) &&
1578+
Op == AMDGPU::V_ADD_F16_t16_e64 ||
1579+
Op == AMDGPU::V_ADD_F16_fake16_e64) &&
15741580
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
15751581
return std::pair(nullptr, SIOutMods::NONE);
15761582

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
22622262
field list<ValueType> ArgVT = _ArgVT;
22632263
field bit EnableClamp = _EnableClamp;
22642264
field bit IsTrue16 = 0;
2265+
field bit IsRealTrue16 = 0;
22652266

22662267
field ValueType DstVT = ArgVT[0];
22672268
field ValueType Src0VT = ArgVT[1];
@@ -2453,6 +2454,21 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
24532454
// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this
24542455
// class, so copy changes to this class in those profiles
24552456
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
2457+
let IsTrue16 = 1;
2458+
let IsRealTrue16 = 1;
2459+
// Most DstVT are 16-bit, but not all.
2460+
let DstRC = getVALUDstForVT_t16<DstVT>.ret;
2461+
let DstRC64 = getVALUDstForVT<DstVT>.ret;
2462+
let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
2463+
let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
2464+
let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
2465+
let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
2466+
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
2467+
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
2468+
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
2469+
}
2470+
2471+
class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
24562472
let IsTrue16 = 1;
24572473
// Most DstVT are 16-bit, but not all
24582474
let DstRC = getVALUDstForVT_t16<DstVT>.ret;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,8 +1641,10 @@ def : ClampPat<V_MAX_F32_e64, f32>;
16411641
def : ClampPat<V_MAX_F64_e64, f64>;
16421642
let SubtargetPredicate = NotHasTrue16BitInsts in
16431643
def : ClampPat<V_MAX_F16_e64, f16>;
1644-
let SubtargetPredicate = HasTrue16BitInsts in
1644+
let SubtargetPredicate = UseRealTrue16Insts in
16451645
def : ClampPat<V_MAX_F16_t16_e64, f16>;
1646+
let SubtargetPredicate = UseFakeTrue16Insts in
1647+
def : ClampPat<V_MAX_F16_fake16_e64, f16>;
16461648

16471649
let SubtargetPredicate = HasVOP3PInsts in {
16481650
def : GCNPat <
@@ -2696,12 +2698,12 @@ def : GCNPat<
26962698
let OtherPredicates = [HasTrue16BitInsts] in {
26972699
def : GCNPat<
26982700
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
2699-
(V_MUL_F16_t16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
2701+
(V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
27002702
>;
27012703

27022704
def : GCNPat<
27032705
(fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
2704-
(V_MUL_F16_t16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
2706+
(V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
27052707
>;
27062708
} // End OtherPredicates
27072709

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ multiclass VOP1Inst_t16<string opName,
152152
defm NAME : VOP1Inst<opName, P, node>;
153153
}
154154
let OtherPredicates = [HasTrue16BitInsts] in {
155-
defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
155+
defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
156156
}
157157
}
158158

@@ -170,7 +170,7 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
170170
}
171171

172172
class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
173-
VOPProfile_True16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
173+
VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
174174

175175
let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
176176
let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod);
@@ -199,7 +199,7 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
199199
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
200200
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
201201
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
202-
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
202+
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
203203
let HasOMod = 1;
204204
}
205205

@@ -292,13 +292,13 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
292292
let OtherPredicates = [NotHasTrue16BitInsts] in
293293
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
294294
let OtherPredicates = [HasTrue16BitInsts] in
295-
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
295+
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
296296
} // End FPDPRounding = 1, isReMaterializable = 0
297297

298298
let OtherPredicates = [NotHasTrue16BitInsts] in
299299
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
300300
let OtherPredicates = [HasTrue16BitInsts] in
301-
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
301+
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
302302

303303
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
304304
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,12 @@ multiclass VOP2Inst_t16<string opName,
194194
let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in {
195195
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
196196
}
197-
let SubtargetPredicate = HasTrue16BitInsts in {
197+
let SubtargetPredicate = UseRealTrue16Insts in {
198198
defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
199199
}
200+
let SubtargetPredicate = UseFakeTrue16Insts in {
201+
defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16", GFX9Renamed>;
202+
}
200203
}
201204

202205
// Creating a _t16_e32 pseudo when there is no corresponding real instruction on
@@ -212,7 +215,7 @@ multiclass VOP2Inst_e64_t16<string opName,
212215
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
213216
}
214217
let SubtargetPredicate = HasTrue16BitInsts in {
215-
defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
218+
defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16", GFX9Renamed>;
216219
}
217220
}
218221

@@ -874,7 +877,7 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
874877
let HasSrc1FloatMods = 0;
875878
let Src1ModSDWA = Int16SDWAInputMods;
876879
}
877-
def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
880+
def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
878881
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
879882
let Src1DPP = VGPR_32_Lo128;
880883
let Src1ModDPP = IntT16VRegInputMods;
@@ -925,9 +928,9 @@ def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
925928

926929
let SubtargetPredicate = isGFX11Plus in {
927930
let isCommutable = 1 in {
928-
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
929-
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
930-
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
931+
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
932+
defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
933+
defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
931934
} // End isCommutable = 1
932935
} // End SubtargetPredicate = isGFX11Plus
933936

@@ -1307,6 +1310,8 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
13071310
multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName,
13081311
string asmName, bit single = 0> {
13091312
defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1313+
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "GFX11", "GFX11_FAKE16"),
1314+
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in
13101315
def _e32_gfx11 :
13111316
VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>,
13121317
VOP2e<op{5-0}, ps.Pfl> {
@@ -1331,7 +1336,8 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
13311336
def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"),
13321337
SIEncodingFamily.GFX11> {
13331338
let AsmString = asmName # ps.Pfl.AsmDPP16;
1334-
let DecoderNamespace = "DPPGFX11";
1339+
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPPGFX11", "DPPGFX11_FAKE16");
1340+
let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only);
13351341
}
13361342
}
13371343
multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName,
@@ -1340,7 +1346,8 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
13401346
if ps.Pfl.HasExtDPP then
13411347
def _dpp8_gfx11 : VOP2_DPP8<op, ps> {
13421348
let AsmString = asmName # ps.Pfl.AsmDPP8;
1343-
let DecoderNamespace = "DPP8GFX11";
1349+
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPP8GFX11", "DPP8GFX11_FAKE16");
1350+
let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only);
13441351
}
13451352
}
13461353

@@ -1491,13 +1498,19 @@ defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f,
14911498
defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>;
14921499

14931500
defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
1501+
defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
14941502
defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
1503+
defm V_SUB_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
14951504
defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
1505+
defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
14961506
defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
1507+
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
14971508
defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">;
14981509
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">;
14991510
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
1511+
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
15001512
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
1513+
defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
15011514
defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">;
15021515
defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">;
15031516

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,6 +1358,8 @@ let AssemblerPredicate = isGFX11Only,
13581358
VOP3_Real<ps, SIEncodingFamily.GFX11>,
13591359
VOP3OpSel_gfx11<op, ps.Pfl>;
13601360
if !not(ps.Pfl.HasOpSel) then
1361+
let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "GFX11", "GFX11_FAKE16"),
1362+
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in
13611363
def _e64_gfx11 :
13621364
VOP3_Real<ps, SIEncodingFamily.GFX11>,
13631365
VOP3e_gfx11<op, ps.Pfl>;
@@ -1388,7 +1390,9 @@ let AssemblerPredicate = isGFX11Only,
13881390
multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName,
13891391
string asmName> {
13901392
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1391-
let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in {
1393+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP16,
1394+
DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPPGFX11", "DPPGFX11_FAKE16"),
1395+
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in {
13921396
defm NAME : VOP3_Real_dpp_Base_gfx11<op, opName>;
13931397
}
13941398
}
@@ -1411,7 +1415,9 @@ let AssemblerPredicate = isGFX11Only,
14111415
multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName,
14121416
string asmName> {
14131417
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1414-
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in {
1418+
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
1419+
DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPP8GFX11", "DPP8GFX11_FAKE16"),
1420+
AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in {
14151421
defm NAME : VOP3_Real_dpp8_Base_gfx11<op, opName>;
14161422
}
14171423
}

0 commit comments

Comments
 (0)