Skip to content

[AMDGPU] Split Dpp8FI and Dpp16FI operands #82379

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 13 additions & 20 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
}

static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeDpp8FI(Val));
}

#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
uint64_t /*Addr*/, \
Expand Down Expand Up @@ -440,19 +446,6 @@ static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
return DecoderUInt128(Lo, Hi);
}

// The disassembler is greedy, so we need to check FI operand value to
// not parse a dpp if the correct literal is not set. For dpp16 the
// autogenerated decoder checks the dpp literal
static bool isValidDPP8(const MCInst &MI) {
using namespace llvm::AMDGPU::DPP;
int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
assert(FiIdx != -1);
if ((unsigned)FiIdx >= MI.getNumOperands())
return false;
unsigned Fi = MI.getOperand(FiIdx).getImm();
return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
}

DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
Expand All @@ -474,13 +467,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now convertDPP8Inst always returns success. I have patches which will clean this up some more.

break;
MI = MCInst(); // clear
Res =
tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear

const auto convertVOPDPP = [&]() {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {
Expand Down Expand Up @@ -530,26 +521,22 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
if (convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
}
}

Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear

Res = tryDecodeInst(DecoderTableDPP8GFX1164,
DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear

Res = tryDecodeInst(DecoderTableDPP8GFX1264,
DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear

Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;
Expand Down Expand Up @@ -982,7 +969,7 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
AMDGPU::OpName::src1_modifiers);
}
}
return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
return MCDisassembler::Success;
}

DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
Expand Down Expand Up @@ -1831,6 +1818,12 @@ MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
return decodeSrcOp(OPW32, Val);
}

MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
return MCOperand();
return MCOperand::createImm(Val);
}

bool AMDGPUDisassembler::isVI() const {
return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ class AMDGPUDisassembler : public MCDisassembler {

MCOperand decodeBoolReg(unsigned Val) const;
MCOperand decodeSplitBarrier(unsigned Val) const;
MCOperand decodeDpp8FI(unsigned Val) const;

int getTTmpIdx(unsigned Val) const;

Expand Down
19 changes: 11 additions & 8 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -987,8 +987,8 @@ def SDWAVopcDst : BoolRC {
}

class NamedIntOperand<ValueType Type, string Prefix, bit Optional = 1,
string ConvertMethod = "nullptr">
: CustomOperand<Type, Optional, NAME> {
string name = NAME, string ConvertMethod = "nullptr">
: CustomOperand<Type, Optional, name> {
let ParserMethod =
"[this](OperandVector &Operands) -> ParseStatus { "#
"return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
Expand Down Expand Up @@ -1090,9 +1090,12 @@ let DefaultValue = "0xf" in {
def DppRowMask : NamedIntOperand<i32, "row_mask">;
def DppBankMask : NamedIntOperand<i32, "bank_mask">;
}
def DppBoundCtrl : NamedIntOperand<i1, "bound_ctrl", 1,
def DppBoundCtrl : NamedIntOperand<i1, "bound_ctrl", 1, "DppBoundCtrl",
"[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">;
def DppFI : NamedIntOperand<i32, "fi">;

let DecoderMethod = "decodeDpp8FI" in
def Dpp8FI : NamedIntOperand<i32, "fi", 1, "DppFI">;
def Dpp16FI : NamedIntOperand<i32, "fi", 1, "DppFI">;

def blgp : CustomOperand<i32, 1, "BLGP">;
def CBSZ : NamedIntOperand<i32, "cbsz">;
Expand Down Expand Up @@ -1821,15 +1824,15 @@ class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperan
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
(ins DppFI:$fi));
(ins Dpp16FI:$fi));
}

class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
(ins dpp8:$dpp8, DppFI:$fi));
(ins dpp8:$dpp8, Dpp8FI:$fi));
}

class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
Expand All @@ -1849,12 +1852,12 @@ class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit Has

class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
(ins DppFI:$fi));
(ins Dpp16FI:$fi));
}

class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
(ins dpp8:$dpp8, DppFI:$fi));
(ins dpp8:$dpp8, Dpp8FI:$fi));
}

// Ins for SDWA
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -379,9 +379,9 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let OutsDPP = (outs Src0RC32:$vdst);
let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, DppFI:$fi);
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, DppFI:$fi);
let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, Dpp8FI:$fi);
let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;

let OutsVOP3DPP = (outs Src0RC64:$vdst);
Expand Down
18 changes: 9 additions & 9 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasModifiers, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret;
Expand All @@ -447,7 +447,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
dpp8:$dpp8, DppFI:$fi);
dpp8:$dpp8, Dpp8FI:$fi);
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
Expand Down Expand Up @@ -500,7 +500,7 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
dpp8:$dpp8, DppFI:$fi);
dpp8:$dpp8, Dpp8FI:$fi);
let Src2Mod = FP32InputMods; // dummy unused modifiers
let Src2RC64 = VGPRSrc_32; // stub argument
}
Expand Down Expand Up @@ -552,11 +552,11 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/
Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsDPP8 = (ins DstRCDPP:$old,
Src0DPP:$src0,
Src1DPP:$src1,
dpp8:$dpp8, DppFI:$fi);
dpp8:$dpp8, Dpp8FI:$fi);
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let OutsVOP3DPP = Outs64;
Expand Down Expand Up @@ -594,11 +594,11 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1>
Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsDPP8 = (ins DstRCDPP:$old,
Src0DPP:$src0,
Src1DPP:$src1,
dpp8:$dpp8, DppFI:$fi);
dpp8:$dpp8, Dpp8FI:$fi);

let HasExt = 1;
let HasExtDPP = 1;
Expand Down Expand Up @@ -645,11 +645,11 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsDPP8 = (ins DstRCDPP:$old,
FPVRegInputMods:$src0_modifiers, Src0DPP:$src0,
FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
dpp8:$dpp8, DppFI:$fi);
dpp8:$dpp8, Dpp8FI:$fi);

let Src0ModVOP3DPP = FPVRegInputMods;
let Src1ModVOP3DPP = FPVRegInputMods;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -532,11 +532,11 @@ def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
VGPR_32:$vdst_in, op_sel0:$op_sel,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, DppFI:$fi);
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
let InsVOP3DPP8 = (ins VGPR_32:$old,
FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, DppFI:$fi);
VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi);

let HasClamp = 0;
let HasExtVOP3DPP = 1;
Expand All @@ -553,12 +553,12 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
op_sel0:$op_sel, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, DppFI:$fi);
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
let InsVOP3DPP8 = (ins VGPR_32:$old,
FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
op_sel0:$op_sel, dpp8:$dpp8, DppFI:$fi);
op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi);
let HasClamp = 0;
let HasSrc2 = 0;
let HasSrc2Mods = 1;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -461,13 +461,13 @@ def VOP3P_DOTF8_Profile : VOP3P_Profile<VOPProfile <[f32, i32, i32, f32]>,

let InsVOP3DPP8 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1,
PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2,
neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp8:$dpp8, DppFI:$fi);
neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp8:$dpp8, Dpp8FI:$fi);

let InsVOP3DPP16 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1,
PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2,
neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp_ctrl:$dpp_ctrl,
DppRowMask:$row_mask, DppBankMask:$bank_mask,
DppBoundCtrl:$bound_ctrl, DppFI:$fi);
DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
}

multiclass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP16 = AsmDPP#"$fi";
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
// DPP8 forbids modifiers and can inherit from VOPC_Profile

let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
Expand Down