Skip to content

Commit 35e27c0

Browse files
authored
[AMDGPU][True16][MC] 16bit vsrc and vdst support in MC (#104510)
This is a large patch includes the MC level support for V_CVT_F16_F32, V_CVT_F32_F16 and V_LDEXP_F16 in true16 format. This patch includes the asm/disasm changes to encode/decode the 16bit vsrc, vdst and src modifieres for vop and dpp format. This patch is a dependency for many 16 bit instructions while only three instructions are updated to make it easier to review. There will be another patch to support these three instructions in the codeGen level, this patch just replaces these two instructions with its fake16 format.
1 parent 050f785 commit 35e27c0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1576
-891
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
280280
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281281
}
282282

283-
bool isRegOrImmWithIntT16InputMods() const {
284-
return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
283+
template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
284+
return isRegOrImmWithInputMods(
285+
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
285286
}
286287

287288
bool isRegOrImmWithInt32InputMods() const {
@@ -292,6 +293,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
292293
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293294
}
294295

296+
template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
297+
return isRegOrInline(
298+
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
299+
}
300+
295301
bool isRegOrInlineImmWithInt32InputMods() const {
296302
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297303
}
@@ -304,8 +310,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
304310
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305311
}
306312

307-
bool isRegOrImmWithFPT16InputMods() const {
308-
return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
313+
template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
314+
return isRegOrImmWithInputMods(
315+
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
309316
}
310317

311318
bool isRegOrImmWithFP32InputMods() const {
@@ -354,6 +361,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
354361
}
355362

356363
bool isVRegWithInputMods() const;
364+
template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
357365
template <bool IsFake16> bool isT16VRegWithInputMods() const;
358366

359367
bool isSDWAOperand(MVT type) const;
@@ -515,7 +523,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
515523
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
516524
}
517525

518-
bool isVCSrcTB16() const {
526+
bool isVCSrcT_b16() const {
519527
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
520528
}
521529

@@ -545,7 +553,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
545553
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
546554
}
547555

548-
bool isVCSrcTF16() const {
556+
bool isVCSrcT_f16() const {
557+
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
558+
}
559+
560+
bool isVCSrcT_bf16() const {
549561
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
550562
}
551563

@@ -583,7 +595,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
583595

584596
bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
585597

586-
bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
598+
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
587599

588600
bool isVSrcT_b16_Lo128() const {
589601
return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
@@ -617,7 +629,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
617629

618630
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
619631

620-
bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
632+
bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
621633

622634
bool isVSrcT_bf16_Lo128() const {
623635
return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
@@ -2162,11 +2174,17 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
21622174
AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
21632175
}
21642176

2165-
template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2177+
template <bool IsFake16>
2178+
bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
21662179
return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
21672180
: AMDGPU::VGPR_16_Lo128RegClassID);
21682181
}
21692182

2183+
template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2184+
return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2185+
: AMDGPU::VGPR_16RegClassID);
2186+
}
2187+
21702188
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
21712189
if (AsmParser->isVI())
21722190
return isVReg32();

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -328,36 +328,40 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
328328
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329329
}
330330

331+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
332+
unsigned OperandSemantics>
331333
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
332334
uint64_t /*Addr*/,
333335
const MCDisassembler *Decoder) {
334336
assert(isUInt<9>(Imm) && "9-bit encoding expected");
335337

336338
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337-
bool IsVGPR = Imm & (1 << 8);
338-
if (IsVGPR) {
339+
if (Imm & AMDGPU::EncValues::IS_VGPR) {
339340
bool IsHi = Imm & (1 << 7);
340341
unsigned RegIdx = Imm & 0x7f;
341342
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342343
}
343-
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344-
Imm & 0xFF, false, 16));
344+
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
345+
OpWidth, Imm & 0xFF, false, ImmWidth,
346+
(AMDGPU::OperandSemantics)OperandSemantics));
345347
}
346348

349+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
350+
unsigned OperandSemantics>
347351
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
348352
uint64_t /*Addr*/,
349353
const MCDisassembler *Decoder) {
350354
assert(isUInt<10>(Imm) && "10-bit encoding expected");
351355

352356
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353-
bool IsVGPR = Imm & (1 << 8);
354-
if (IsVGPR) {
357+
if (Imm & AMDGPU::EncValues::IS_VGPR) {
355358
bool IsHi = Imm & (1 << 9);
356359
unsigned RegIdx = Imm & 0xff;
357360
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358361
}
359-
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
360-
Imm & 0xFF, false, 16));
362+
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
363+
OpWidth, Imm & 0xFF, false, ImmWidth,
364+
(AMDGPU::OperandSemantics)OperandSemantics));
361365
}
362366

363367
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
@@ -628,6 +632,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
628632
convertVOP3DPPInst(MI); // Regular VOP3 case
629633
}
630634

635+
convertTrue16OpSel(MI);
636+
631637
if (AMDGPU::isMAC(MI.getOpcode())) {
632638
// Insert dummy unused src2_modifiers.
633639
insertNamedMCOperand(MI, MCOperand::createImm(0),

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5556,9 +5556,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
55565556
case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
55575557
case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
55585558
case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
5559-
case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
5560-
case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
5561-
case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
5559+
case AMDGPU::S_CVT_F32_F16:
5560+
case AMDGPU::S_CVT_HI_F32_F16:
5561+
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5562+
: AMDGPU::V_CVT_F32_F16_fake16_e64;
5563+
case AMDGPU::S_CVT_F16_F32:
5564+
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5565+
: AMDGPU::V_CVT_F16_F32_fake16_e64;
55625566
case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
55635567
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
55645568
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;

0 commit comments

Comments
 (0)