Skip to content

Commit 98db8d0

Browse files
authored
[AMDGPU] Fix v_dot2_f16_f16/v_dot2_bf16_bf16 operands (#82423)
src0 and src1 are packed f16/bf16, we are printing literals like 0x40002000, but we cannot parse it.
1 parent ff4d6c6 commit 98db8d0

File tree

5 files changed

+36
-18
lines changed

5 files changed

+36
-18
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
323323
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
324324
}
325325

326+
bool isPackedFP16InputMods() const {
327+
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
328+
}
326329

327330
bool isVReg() const {
328331
return isRegClass(AMDGPU::VGPR_32RegClassID) ||

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,9 +1289,8 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
12891289

12901290
class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
12911291
let Name = "PackedFP"#opSize#"InputMods";
1292-
let ParserMethod = "parseRegOrImm";
1293-
let PredicateMethod = "isRegOrImm";
1294-
// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1292+
let ParserMethod = "parseRegOrImmWithFPInputMods";
1293+
let PredicateMethod = "isPackedFP"#opSize#"InputMods";
12951294
}
12961295

12971296
class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1305,7 +1304,7 @@ def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
13051304
def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
13061305

13071306
class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1308-
// let PrintMethod = "printPackedFPInputMods";
1307+
let PrintMethod = "printOperandAndFPInputMods";
13091308
}
13101309

13111310
class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
@@ -1606,8 +1605,11 @@ class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
16061605
}
16071606

16081607
class getOpSelMod <ValueType VT> {
1609-
Operand ret = !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1610-
FP16InputMods, IntOpSelMods);
1608+
Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
1609+
!eq(VT, bf16) : FP16InputMods,
1610+
!eq(VT, v2f16) : PackedF16InputMods,
1611+
!eq(VT, v2bf16) : PackedF16InputMods,
1612+
1 : IntOpSelMods);
16111613
}
16121614

16131615
// Return type of input modifiers operand specified input operand for DPP

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -868,20 +868,9 @@ def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>;
868868
def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
869869
}
870870

871-
class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
871+
class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
872872
let HasClamp = 0;
873873
let HasOMod = 0;
874-
// Override modifiers for bf16(i16) (same as float modifiers).
875-
let HasSrc0Mods = 1;
876-
let HasSrc1Mods = 1;
877-
let HasSrc2Mods = 1;
878-
let Src0ModVOP3DPP = FPVRegInputMods;
879-
let Src1ModVOP3DPP = FPVRegInputMods;
880-
let Src2ModVOP3DPP = FP16InputMods;
881-
let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
882-
HasClamp, HasOMod, FP16InputMods,
883-
FP16InputMods, FP16InputMods>.ret;
884-
let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, HasOMod, 1, 1, 1>.ret;
885874
}
886875

887876
let SubtargetPredicate = isGFX11Plus in {

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2116,6 +2116,12 @@ v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
21162116
v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
21172117
// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
21182118

2119+
v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
2120+
// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
2121+
2122+
v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
2123+
// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
2124+
21192125
v_dot2_f16_f16 v5, v1, v2, s3
21202126
// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
21212127

@@ -2161,6 +2167,12 @@ v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
21612167
v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
21622168
// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
21632169

2170+
v_dot2_f16_f16 v2, v0, 0x20004000, v2
2171+
// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
2172+
2173+
v_dot2_f16_f16 v2, 0x20004000, v0, v2
2174+
// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
2175+
21642176
v_fma_dx9_zero_f32 v5, v1, v2, s3
21652177
// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
21662178

llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1788,6 +1788,12 @@
17881788
# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
17891789
0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
17901790

1791+
# GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
1792+
0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20
1793+
1794+
# GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
1795+
0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20
1796+
17911797
# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
17921798
0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00
17931799

@@ -1833,6 +1839,12 @@
18331839
# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
18341840
0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
18351841

1842+
# GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
1843+
0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20
1844+
1845+
# GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
1846+
0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20
1847+
18361848
# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
18371849
0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00
18381850

0 commit comments

Comments
 (0)