Skip to content

Commit 87e07e8

Browse files
committed
[AMDGPU][True16][MC]support 16bit operand and vdst in MC
1 parent c9ba6d3 commit 87e07e8

35 files changed

+1164
-750
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
280280
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281281
}
282282

283-
bool isRegOrImmWithIntT16InputMods() const {
284-
return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
283+
template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
284+
return isRegOrImmWithInputMods(
285+
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
285286
}
286287

287288
bool isRegOrImmWithInt32InputMods() const {
@@ -292,6 +293,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
292293
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293294
}
294295

296+
template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
297+
return isRegOrInline(
298+
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
299+
}
300+
295301
bool isRegOrInlineImmWithInt32InputMods() const {
296302
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297303
}
@@ -304,8 +310,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
304310
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305311
}
306312

307-
bool isRegOrImmWithFPT16InputMods() const {
308-
return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
313+
template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
314+
return isRegOrImmWithInputMods(
315+
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
309316
}
310317

311318
bool isRegOrImmWithFP32InputMods() const {
@@ -354,6 +361,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
354361
}
355362

356363
bool isVRegWithInputMods() const;
364+
template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
357365
template <bool IsFake16> bool isT16VRegWithInputMods() const;
358366

359367
bool isSDWAOperand(MVT type) const;
@@ -515,7 +523,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
515523
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
516524
}
517525

518-
bool isVCSrcTB16() const {
526+
bool isVCSrcT_b16() const {
519527
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
520528
}
521529

@@ -545,7 +553,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
545553
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
546554
}
547555

548-
bool isVCSrcTF16() const {
556+
bool isVCSrcT_f16() const {
557+
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
558+
}
559+
560+
bool isVCSrcT_bf16() const {
549561
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
550562
}
551563

@@ -583,7 +595,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
583595

584596
bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
585597

586-
bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
598+
bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
587599

588600
bool isVSrcT_b16_Lo128() const {
589601
return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
@@ -617,7 +629,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
617629

618630
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
619631

620-
bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
632+
bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
621633

622634
bool isVSrcT_bf16_Lo128() const {
623635
return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
@@ -2162,11 +2174,17 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
21622174
AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
21632175
}
21642176

2165-
template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2177+
template <bool IsFake16>
2178+
bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
21662179
return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
21672180
: AMDGPU::VGPR_16_Lo128RegClassID);
21682181
}
21692182

2183+
template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2184+
return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2185+
: AMDGPU::VGPR_16RegClassID);
2186+
}
2187+
21702188
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
21712189
if (AsmParser->isVI())
21722190
return isVReg32();

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -328,36 +328,40 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
328328
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329329
}
330330

331+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
332+
unsigned OperandSemantics>
331333
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
332334
uint64_t /*Addr*/,
333335
const MCDisassembler *Decoder) {
334336
assert(isUInt<9>(Imm) && "9-bit encoding expected");
335337

336338
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337-
bool IsVGPR = Imm & (1 << 8);
338-
if (IsVGPR) {
339+
if (Imm & AMDGPU::EncValues::IS_VGPR) {
339340
bool IsHi = Imm & (1 << 7);
340341
unsigned RegIdx = Imm & 0x7f;
341342
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342343
}
343-
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344-
Imm & 0xFF, false, 16));
344+
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
345+
OpWidth, Imm & 0xFF, false, ImmWidth,
346+
(AMDGPU::OperandSemantics)OperandSemantics));
345347
}
346348

349+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
350+
unsigned OperandSemantics>
347351
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
348352
uint64_t /*Addr*/,
349353
const MCDisassembler *Decoder) {
350354
assert(isUInt<10>(Imm) && "10-bit encoding expected");
351355

352356
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353-
bool IsVGPR = Imm & (1 << 8);
354-
if (IsVGPR) {
357+
if (Imm & AMDGPU::EncValues::IS_VGPR) {
355358
bool IsHi = Imm & (1 << 9);
356359
unsigned RegIdx = Imm & 0xff;
357360
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358361
}
359-
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
360-
Imm & 0xFF, false, 16));
362+
return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
363+
OpWidth, Imm & 0xFF, false, ImmWidth,
364+
(AMDGPU::OperandSemantics)OperandSemantics));
361365
}
362366

363367
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
@@ -628,6 +632,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
628632
convertVOP3DPPInst(MI); // Regular VOP3 case
629633
}
630634

635+
convertTrue16OpSel(MI);
636+
631637
if (AMDGPU::isMAC(MI.getOpcode())) {
632638
// Insert dummy unused src2_modifiers.
633639
insertNamedMCOperand(MI, MCOperand::createImm(0),

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5424,9 +5424,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54245424
case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
54255425
case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
54265426
case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
5427-
case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
5428-
case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
5429-
case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
5427+
case AMDGPU::S_CVT_F32_F16:
5428+
case AMDGPU::S_CVT_HI_F32_F16:
5429+
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5430+
: AMDGPU::V_CVT_F32_F16_fake16_e64;
5431+
case AMDGPU::S_CVT_F16_F32:
5432+
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5433+
: AMDGPU::V_CVT_F16_F32_fake16_e64;
54305434
case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
54315435
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
54325436
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;

0 commit comments

Comments
 (0)