Skip to content

[AMDGPU][MC] Enables sgpr or imm src1 for float VOP3 DPP, but excludi… #87382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4627,10 +4627,15 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
if (Src1Idx >= 0) {
const MCOperand &Src1 = Inst.getOperand(Src1Idx);
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
if (Src1.isImm() ||
(Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
Error(Op.getStartLoc(), "invalid operand for instruction");
if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
SMLoc S = getRegLoc(Reg, Operands);
Error(S, "invalid operand for instruction");
return false;
}
if (Src1.isImm()) {
Error(getInstLoc(Operands),
"src1 immediate operand invalid for instruction");
return false;
}
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2268,7 +2268,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
Expand Down
19 changes: 15 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
dpp8:$dpp8, Dpp8FI:$fi);
let Src2Mod = FP32InputMods; // dummy unused modifiers
let Src2RC64 = VGPRSrc_32; // stub argument
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
}
def VOP_MAC_F32 : VOP_MAC <f32>;
let HasExtDPP = 0, HasExt32BitDPP = 0 in
Expand Down Expand Up @@ -618,7 +619,7 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2";

let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC64:$vdst);

// Suppress src2 implied by type since the 32-bit encoding uses an
// implicit VCC use.
Expand Down Expand Up @@ -652,7 +653,7 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
dpp8:$dpp8, Dpp8FI:$fi);

let Src0ModVOP3DPP = FPVRegInputMods;
let Src1ModVOP3DPP = FPVRegInputMods;
let Src1ModVOP3DPP = FP32VCSrcInputMods;

let HasExt = 1;
let HasExtDPP = 1;
Expand All @@ -662,7 +663,17 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
}

def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
let IsTrue16 = 1;
let DstRC64 = getVALUDstForVT<DstVT>.ret;

let Src0Mod = getSrcMod<f16>.ret;
let Src1Mod = getSrcMod<f16>.ret;

let Src0VOP3DPP = VGPRSrc_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret;
}

def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
let Outs32 = (outs SReg_32:$vdst);
Expand Down Expand Up @@ -703,7 +714,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
//===----------------------------------------------------------------------===//

let SubtargetPredicate = isGFX11Plus in
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>;
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>;
defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
let HasDst32 = 0;
// VOPC disallows dst_sel and dst_unused as they have no effect on destination
let EmitDstSel = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
let Outs64 = (outs VOPDstS64orS32:$sdst);
let OutsVOP3DPP = Outs64;
let OutsVOP3DPP8 = Outs64;
Expand Down Expand Up @@ -112,6 +114,8 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
"$src0, $src1");
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
}

multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> {
Expand Down Expand Up @@ -785,6 +789,8 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
}

multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
Expand Down Expand Up @@ -812,6 +818,8 @@ class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, Va
let AsmVOP3Base = "$src0_modifiers, $src1";
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
// FIXME: work around AsmParser bug
let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
}

multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1150_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,13 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]

v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]

v_add_f32_e64_dpp v5, v1, s2 row_mirror
// GFX1150: encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff]

v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf
// GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff]

// This is a regression test for potential changes in the future.
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
8 changes: 4 additions & 4 deletions llvm/test/MC/AMDGPU/gfx11_asm_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ v_add3_u32_e64_dpp v5, v1, s1, v0 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction

v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction

v_cvt_f32_i32_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
Expand Down Expand Up @@ -135,7 +135,7 @@ v_fmac_f16_e64_dpp v5, s2, v3 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_fmac_f16_e64_dpp v5, v2, 1.0 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction

v_fmac_f32_e64_dpp v5, s2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
Expand All @@ -144,7 +144,7 @@ v_fmac_f32_e64_dpp v5, 0x1234, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_fmac_f32_e64_dpp v5, v2, 1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction

v_fmac_f32_e64_dpp v5, -1.0, v3 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_features.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]

// This is a regression test for potential changes in the future.
v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]

//
// Elements of CPol operand can be given in any order
//
Expand Down
Loading