Skip to content

[AMDGPU] Automate creation of byte_sel dags. NFCI. #140155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,7 @@ def WaitVMVSrc : NamedIntOperand<"wait_vm_vsrc"> {
def ByteSel : NamedIntOperand<"byte_sel"> {
let Validator = "isUInt<2>";
}
def ByteSel0 : DefaultOperand<ByteSel, 0>;

let PrintMethod = "printBitOp3" in
def BitOp3 : NamedIntOperand<"bitop3">;
Expand Down Expand Up @@ -1971,7 +1972,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs>
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod,
bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> {
dag src0 = !if(!ge(NumSrcArgs, 1),
!if (HasModifiers,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0),
Expand All @@ -1987,20 +1989,29 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins Src2Mod:$src2_modifiers, Src2RC:$src2),
(ins Src2RC:$src2)),
(ins));
dag clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins));
// If there is vdst_in after clamp with HasFP8DstByteSel we cannot use
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can change the placement of the byte_sel operands, but that is a big disruption. If needed it will require a separate change.

// Clamp0 with default value, all default operands must be at the end.
dag clamp = !if(HasClamp, !if(HasFP8DstByteSel, (ins Clamp:$clamp),
(ins Clamp0:$clamp)),
(ins));
dag omod = !if(HasOMod, (ins omod0:$omod), (ins));
dag bytesel = !if(HasFP8ByteSel,
!con(!if(HasFP8DstByteSel, (ins VGPR_32:$vdst_in), (ins)),
(ins ByteSel0:$byte_sel)),
(ins));

dag ret = !con(src0, src1, src2, clamp, omod);
dag ret = !con(src0, src1, src2, clamp, omod, bytesel);
}

class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel,
bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> {
// getInst64 handles clamp and omod. implicit mutex between vop3p and omod
dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
Src0Mod, Src1Mod, Src2Mod, HasFP8ByteSel, HasFP8DstByteSel>.ret;
dag opsel = (ins op_sel0:$op_sel);
dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
}
Expand Down Expand Up @@ -2612,7 +2623,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
HasOMod, Src0Mod, Src1Mod, Src2Mod,
HasFP8ByteSel, HasFP8DstByteSel>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
Expand All @@ -2630,7 +2642,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel,
HasFP8ByteSel, HasFP8DstByteSel>.ret;
defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
Expand Down
11 changes: 0 additions & 11 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -674,17 +674,6 @@ class VOPProfile_Base_CVT_F_F8_ByteSel<ValueType DstVT> : VOPProfile<[DstVT, i32
let HasClamp = 0;
let HasOMod = 0;
let HasModifiers = 0;

defvar bytesel = (ins ByteSel:$byte_sel);
let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
bytesel);
let InsVOP3Base = !con(getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP,
NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP,
Src2ModVOP3DPP, HasOpSel>.ret,
bytesel);
}

let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
Expand Down
11 changes: 1 addition & 10 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
let HasExtVOP3DPP = 1;
let HasOpSel = 1;
let HasFP8DstByteSel = 1;
let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand.
let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
getAsmVOP3OpSel<3, HasClamp, HasOMod,
HasSrc0FloatMods, HasSrc1FloatMods,
Expand All @@ -607,16 +608,6 @@ class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
VOP3_Profile<VOPProfile<[i32, SrcVT, i32, untyped]>> {
let HasFP8DstByteSel = 1;
let HasClamp = 0;
defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel);
let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
bytesel);
let InsVOP3Base = !con(
getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret,
bytesel);
}

def IsPow2Plus1: PatLeaf<(i32 imm), [{
Expand Down