Skip to content

[AMDGPU][True16][MC] true16 for v_mad_u/i32_u/i16 #124781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

broxigarchen
Copy link
Contributor

true16 asm/dasm support for v_mad_u32_i16 and v_mad_i32_i16

@broxigarchen broxigarchen force-pushed the main-merge-true16-vop3-mc-more-instructions-12 branch from cfb45f7 to e58bbde Compare January 28, 2025 16:26
@broxigarchen broxigarchen marked this pull request as ready for review January 28, 2025 16:28
@llvmbot llvmbot added backend:AMDGPU mc Machine (object) code labels Jan 28, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 28, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Brox Chen (broxigarchen)

Changes

true16 asm/dasm support for v_mad_u32_i16 and v_mad_i32_i16


Patch is 205.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124781.diff

13 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+4-4)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3.s (+32-8)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s (+106-64)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s (+80-56)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+32-8)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+84-72)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+76-64)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt (+52-4)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt (+112-28)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt (+112-28)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+40-4)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+120-30)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+120-30)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index c06c932a5375e8..ce73e0ca361d9b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -645,8 +645,8 @@ defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
 defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
 
 let isCommutable = 1 in {
-  defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
-  defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
+  defm V_MAD_U32_U16 : VOP3Inst_t16 <"v_mad_u32_u16", VOP_I32_I16_I16_I32>;
+  defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
 } // End isCommutable = 1
 
 defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
@@ -1736,8 +1736,8 @@ defm V_ADD3_U32            : VOP3_Realtriple_gfx11_gfx12<0x255>;
 defm V_LSHL_OR_B32         : VOP3_Realtriple_gfx11_gfx12<0x256>;
 defm V_AND_OR_B32          : VOP3_Realtriple_gfx11_gfx12<0x257>;
 defm V_OR3_B32             : VOP3_Realtriple_gfx11_gfx12<0x258>;
-defm V_MAD_U32_U16         : VOP3_Realtriple_gfx11_gfx12<0x259>;
-defm V_MAD_I32_I16         : VOP3_Realtriple_gfx11_gfx12<0x25a>;
+defm V_MAD_U32_U16         : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x259, "v_mad_u32_u16">;
+defm V_MAD_I32_I16         : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x25a, "v_mad_i32_i16">;
 defm V_PERMLANE16_B32      : VOP3_Real_Base_gfx11_gfx12<0x25b>;
 defm V_PERMLANEX16_B32     : VOP3_Real_Base_gfx11_gfx12<0x25c>;
 defm V_MAXMIN_F32          : VOP3_Realtriple_gfx11<0x25e>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 40e3fbda47787a..e55fbfc6e18c8c 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -2906,11 +2906,11 @@ v_mad_i16 v5.l, -1, exec_hi, src_scc
 v_mad_i16 v5.l, src_scc, vcc_lo, -1
 // GFX11: v_mad_i16 v5.l, src_scc, vcc_lo, -1     ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03]
 
-v_mad_i32_i16 v5, v1, v2, v3
-// GFX11: v_mad_i32_i16 v5, v1, v2, v3            ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04]
+v_mad_i32_i16 v5, v1.l, v2.l, v3
+// GFX11: v_mad_i32_i16 v5, v1.l, v2.l, v3        ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04]
 
-v_mad_i32_i16 v5, v255, v255, s3
-// GFX11: v_mad_i32_i16 v5, v255, v255, s3        ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00]
+v_mad_i32_i16 v5, v255.l, v255.l, s3
+// GFX11: v_mad_i32_i16 v5, v255.l, v255.l, s3    ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00]
 
 v_mad_i32_i16 v5, s1, s2, v255
 // GFX11: v_mad_i32_i16 v5, s1, s2, v255          ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07]
@@ -2951,6 +2951,18 @@ v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0]
 v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp
 // GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
 
+v_mad_i32_i16 v5, v1.h, v2.l, v3
+// GFX11: v_mad_i32_i16 v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x5a,0xd6,0x01,0x05,0x0e,0x04]
+
+v_mad_i32_i16 v5, v255.l, v255.h, s3
+// GFX11: v_mad_i32_i16 v5, v255.l, v255.h, s3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x5a,0xd6,0xff,0xff,0x0f,0x00]
+
+v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc
+// GFX11: v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x00,0x5a,0xd6,0xfd,0xd4,0xf4,0x03]
+
+v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 clamp
+// GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 clamp ; encoding: [0xff,0x80,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
+
 v_mad_i32_i24 v5, v1, v2, s3
 // GFX11: v_mad_i32_i24 v5, v1, v2, s3            ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00]
 
@@ -3134,11 +3146,11 @@ v_mad_u16 v5.l, -1, exec_hi, src_scc
 v_mad_u16 v5.l, src_scc, vcc_lo, -1
 // GFX11: v_mad_u16 v5.l, src_scc, vcc_lo, -1     ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03]
 
-v_mad_u32_u16 v5, v1, v2, v3
-// GFX11: v_mad_u32_u16 v5, v1, v2, v3            ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04]
+v_mad_u32_u16 v5, v1.l, v2.l, v3
+// GFX11: v_mad_u32_u16 v5, v1.l, v2.l, v3        ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04]
 
-v_mad_u32_u16 v5, v255, v255, s3
-// GFX11: v_mad_u32_u16 v5, v255, v255, s3        ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00]
+v_mad_u32_u16 v5, v255.l, v255.l, s3
+// GFX11: v_mad_u32_u16 v5, v255.l, v255.l, s3    ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00]
 
 v_mad_u32_u16 v5, s1, s2, v255
 // GFX11: v_mad_u32_u16 v5, s1, s2, v255          ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07]
@@ -3179,6 +3191,18 @@ v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0]
 v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp
 // GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
 
+v_mad_u32_u16 v5, v1.h, v2.l, v3
+// GFX11: v_mad_u32_u16 v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x59,0xd6,0x01,0x05,0x0e,0x04]
+
+v_mad_u32_u16 v5, v255.l, v255.h, s3
+// GFX11: v_mad_u32_u16 v5, v255.l, v255.h, s3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x59,0xd6,0xff,0xff,0x0f,0x00]
+
+v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc
+// GFX11: v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x00,0x59,0xd6,0xfd,0xd4,0xf4,0x03]
+
+v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 clamp
+// GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 clamp ; encoding: [0xff,0x80,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
+
 v_mad_u32_u24 v5, v1, v2, s3
 // GFX11: v_mad_u32_u24 v5, v1, v2, s3            ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 2bff644605ff60..3a3f6c178cbde2 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -2000,47 +2000,68 @@ v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
 v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0]
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01]
 
-v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
 
-v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
+v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3]
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 row_mirror
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
+
+v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
 
 v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
 // GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -2153,47 +2174,68 @@ v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
 v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
 
-v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0]
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff]
 
-v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff]
 
-v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff]
 
-v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff]
 
-v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01]
 
-v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
 
-v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_m...
[truncated]

@broxigarchen
Copy link
Contributor Author

CI error is not related

@broxigarchen broxigarchen merged commit 6b6c09e into llvm:main Jan 30, 2025
10 of 12 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants