llvm · rampitec · Feb 21, 2024 · Feb 20, 2024 · arsenm · Feb 25, 2024
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -323,6 +323,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
   }
 
+  bool isPackedFP16InputMods() const {
+    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
+  }
 
   bool isVReg() const {
     return isRegClass(AMDGPU::VGPR_32RegClassID) ||

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1289,9 +1289,8 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
 
 class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
   let Name = "PackedFP"#opSize#"InputMods";
-  let ParserMethod = "parseRegOrImm";
-  let PredicateMethod = "isRegOrImm";
-//  let PredicateMethod = "isPackedFP"#opSize#"InputMods";
+  let ParserMethod = "parseRegOrImmWithFPInputMods";
+  let PredicateMethod = "isPackedFP"#opSize#"InputMods";
 }
 
 class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1305,7 +1304,7 @@ def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
 def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
 
 class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
-//  let PrintMethod = "printPackedFPInputMods";
+  let PrintMethod = "printOperandAndFPInputMods";
 }
 
 class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
@@ -1606,8 +1605,11 @@ class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
 }
 
 class getOpSelMod <ValueType VT> {
-  Operand ret = !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
-                    FP16InputMods, IntOpSelMods);
+  Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
+                      !eq(VT, bf16) : FP16InputMods,
+                      !eq(VT, v2f16) : PackedF16InputMods,
+                      !eq(VT, v2bf16) : PackedF16InputMods,
+                      1 : IntOpSelMods);
 }
 
 // Return type of input modifiers operand specified input operand for DPP

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -866,20 +866,9 @@ def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>;
 def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
 }
 
-class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
+class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
   let HasClamp = 0;
   let HasOMod = 0;
-  // Override modifiers for bf16(i16) (same as float modifiers).
-  let HasSrc0Mods = 1;
-  let HasSrc1Mods = 1;
-  let HasSrc2Mods = 1;
-  let Src0ModVOP3DPP = FPVRegInputMods;
-  let Src1ModVOP3DPP = FPVRegInputMods;
-  let Src2ModVOP3DPP = FP16InputMods;
-  let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
-                                     HasClamp, HasOMod, FP16InputMods,
-                                     FP16InputMods, FP16InputMods>.ret;
-  let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, HasOMod, 1, 1, 1>.ret;
 }
 
 let SubtargetPredicate = isGFX11Plus in {

diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -2116,6 +2116,12 @@ v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
 v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
 // GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
+v_dot2_bf16_bf16 v2, v0, 0x20004000, v2
+// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_bf16_bf16 v2, 0x20004000, v0, v2
+// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+
 v_dot2_f16_f16 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
 
@@ -2161,6 +2167,12 @@ v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
 v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1]
 // GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
+v_dot2_f16_f16 v2, v0, 0x20004000, v2
+// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+
+v_dot2_f16_f16 v2, 0x20004000, v0, v2
+// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+
 v_fma_dx9_zero_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
 

diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -1788,6 +1788,12 @@
 # GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
 
+# GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20
+
+# GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20
+
 # GFX11: v_dot2_f16_f16 v5, v1, v2, s3           ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00]
 0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00
 
@@ -1833,6 +1839,12 @@
 # GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
 
+# GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20
+
+# GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20]
+0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20
+
 # GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3       ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00]
 0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00