[AMDGPU] Support v_lshl_add_u64 in gfx1250 (llvm#145591)

rampitec · web-flow · commit d06c2efd67e5 · 2025-06-24T15:49:01.000-07:00
It also brings in some DPP changes needed to define it.
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -661,6 +661,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
         break;
 
+      // FIXME: DecoderTableGFX125064 is not defined yet.
+      if (isGFX1250() &&
+          tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS))
+        break;
+
       if (isGFX12() &&
           tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
                         Address, CS))
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -32,6 +32,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
   let HasExtDPP = 0;
 }
 
+let HasExt64BitDPP = 1 in {
 def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
 def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
 
@@ -48,10 +49,13 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
   let HasExtDPP = 0;
 }
 
+def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;
+
 def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
   let HasExtVOP3DPP = 0;
   let HasExtDPP = 0;
 }
+} // End HasExt64BitDPP = 1;
 
 //===----------------------------------------------------------------------===//
 // VOP3 INTERP
@@ -722,7 +726,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
 // V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
 // src0 is shifted left by 0-4 (use “0” to get ADD_U64).
 let SubtargetPredicate = HasLshlAddU64Inst in
-defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
+defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", V_LSHL_ADD_U64_PROF>;
 
 let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
     SchedRW = [WriteFloatCvt] in {
@@ -1889,6 +1893,9 @@ let AssemblerPredicate = isGFX11Plus in {
   def : AMDGPUMnemonicAlias<"v_xor_add_u32", "v_xad_u32">;
 }
 
+// These instructions differ from GFX12 variant by supporting DPP:
+defm V_LSHL_ADD_U64                  : VOP3Only_Realtriple_gfx1250<0x252>;
+
 //===----------------------------------------------------------------------===//
 // GFX10.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1552,12 +1552,17 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
             ""));
 }
 
-multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
+                    list<Predicate> predicates = []> {
   def _e64 : VOP3InstBase<OpName, P, node>;
-  let SubtargetPredicate = isGFX11Plus in {
-    if P.HasExtVOP3DPP then
-      def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
-  } // end SubtargetPredicate = isGFX11Plus
+  if P.HasExtVOP3DPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <OpName, P> {
+      let SubtargetPredicate = isGFX11Plus;
+    }
+  else if P.HasExt64BitDPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <OpName, P> {
+      let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]);
+    }
 }
 
 class UniformUnaryFragOrOp<SDPatternOperator Op> {
@@ -1961,6 +1966,17 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
 multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
   VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
 
+multiclass VOP3Only_Realtriple_with_name_gfx12_not_gfx1250<bits<10> op, string opName,
+                                                           string asmName, string pseudo_mnemonic = "",
+                                                           bit isSingle = 0> :
+  VOP3_Realtriple_with_name<GFX12Not12_50Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
+  VOP3_Real_Base<GFX1250Gen, op, NAME, 1/*IsSingle*/>;
+
+multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
+  VOP3_Realtriple<GFX1250Gen, op, isSingle>;
+
 multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
                                      string pseudo_mnemonic = "", bit isSingle = 0> :
   VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1     ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1     ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX125X-ERR,GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}}                                          ^
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}}                                          ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+
+0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1     ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# GFX1250-FAKE16: {{.*}}
+# GFX1250-REAL16: {{.*}}