Skip to content

[AMDGPU] Fix v_dot2_f16_f16/v_dot2_bf16_bf16 operands #82423

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
}

bool isPackedFP16InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
}

bool isVReg() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1289,9 +1289,8 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {

class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "PackedFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImm";
let PredicateMethod = "isRegOrImm";
// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithFPInputMods";
let PredicateMethod = "isPackedFP"#opSize#"InputMods";
}

class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
Expand All @@ -1305,7 +1304,7 @@ def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;

class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
// let PrintMethod = "printPackedFPInputMods";
let PrintMethod = "printOperandAndFPInputMods";
}

class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
Expand Down Expand Up @@ -1606,8 +1605,11 @@ class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
}

class getOpSelMod <ValueType VT> {
Operand ret = !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
FP16InputMods, IntOpSelMods);
Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
!eq(VT, bf16) : FP16InputMods,
!eq(VT, v2f16) : PackedF16InputMods,
!eq(VT, v2bf16) : PackedF16InputMods,
1 : IntOpSelMods);
}

// Return type of input modifiers operand specified input operand for DPP
Expand Down
13 changes: 1 addition & 12 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -866,20 +866,9 @@ def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>;
def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
}

class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
let HasClamp = 0;
let HasOMod = 0;
// Override modifiers for bf16(i16) (same as float modifiers).
let HasSrc0Mods = 1;
let HasSrc1Mods = 1;
let HasSrc2Mods = 1;
let Src0ModVOP3DPP = FPVRegInputMods;
let Src1ModVOP3DPP = FPVRegInputMods;
let Src2ModVOP3DPP = FP16InputMods;
let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasClamp, HasOMod, FP16InputMods,
FP16InputMods, FP16InputMods>.ret;
let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, HasOMod, 1, 1, 1>.ret;
}

let SubtargetPredicate = isGFX11Plus in {
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
Original file line number Diff line number Diff line change
Expand Up @@ -2116,6 +2116,12 @@ v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]

v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]

v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need tests with op_sel + constant?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is nothing in the MC which ties op_sel/op_sel_hi to a fact an operand is a literal, so the answer is probably no. But then I do not see any tests with op_sel_hi at all.

// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]

v_dot2_f16_f16 v5, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]

Expand Down Expand Up @@ -2161,6 +2167,12 @@ v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]

v_dot2_f16_f16 v2, v0, 0x20004000, v2
// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]

v_dot2_f16_f16 v2, 0x20004000, v0, v2
// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]

v_fma_dx9_zero_f32 v5, v1, v2, s3
// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]

Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1788,6 +1788,12 @@
# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00

# GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20

# GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20

# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00

Expand Down Expand Up @@ -1833,6 +1839,12 @@
# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00

# GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20

# GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20

# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00

Expand Down