-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][True16][MC] true16 for v_mad_u/i32_u/i16 #124781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
broxigarchen
merged 1 commit into
llvm:main
from
broxigarchen:main-merge-true16-vop3-mc-more-instructions-12
Jan 30, 2025
Merged
[AMDGPU][True16][MC] true16 for v_mad_u/i32_u/i16 #124781
broxigarchen
merged 1 commit into
llvm:main
from
broxigarchen:main-merge-true16-vop3-mc-more-instructions-12
Jan 30, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cfb45f7
to
e58bbde
Compare
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Brox Chen (broxigarchen) Changestrue16 asm/dasm support for v_mad_u32_i16 and v_mad_i32_i16 Patch is 205.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124781.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index c06c932a5375e8..ce73e0ca361d9b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -645,8 +645,8 @@ defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
let isCommutable = 1 in {
- defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
- defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
+ defm V_MAD_U32_U16 : VOP3Inst_t16 <"v_mad_u32_u16", VOP_I32_I16_I16_I32>;
+ defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
} // End isCommutable = 1
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
@@ -1736,8 +1736,8 @@ defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;
defm V_OR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x258>;
-defm V_MAD_U32_U16 : VOP3_Realtriple_gfx11_gfx12<0x259>;
-defm V_MAD_I32_I16 : VOP3_Realtriple_gfx11_gfx12<0x25a>;
+defm V_MAD_U32_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x259, "v_mad_u32_u16">;
+defm V_MAD_I32_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x25a, "v_mad_i32_i16">;
defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>;
defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>;
defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 40e3fbda47787a..e55fbfc6e18c8c 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -2906,11 +2906,11 @@ v_mad_i16 v5.l, -1, exec_hi, src_scc
v_mad_i16 v5.l, src_scc, vcc_lo, -1
// GFX11: v_mad_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03]
-v_mad_i32_i16 v5, v1, v2, v3
-// GFX11: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04]
+v_mad_i32_i16 v5, v1.l, v2.l, v3
+// GFX11: v_mad_i32_i16 v5, v1.l, v2.l, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04]
-v_mad_i32_i16 v5, v255, v255, s3
-// GFX11: v_mad_i32_i16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00]
+v_mad_i32_i16 v5, v255.l, v255.l, s3
+// GFX11: v_mad_i32_i16 v5, v255.l, v255.l, s3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00]
v_mad_i32_i16 v5, s1, s2, v255
// GFX11: v_mad_i32_i16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07]
@@ -2951,6 +2951,18 @@ v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0]
v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp
// GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
+v_mad_i32_i16 v5, v1.h, v2.l, v3
+// GFX11: v_mad_i32_i16 v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x5a,0xd6,0x01,0x05,0x0e,0x04]
+
+v_mad_i32_i16 v5, v255.l, v255.h, s3
+// GFX11: v_mad_i32_i16 v5, v255.l, v255.h, s3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x5a,0xd6,0xff,0xff,0x0f,0x00]
+
+v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc
+// GFX11: v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x00,0x5a,0xd6,0xfd,0xd4,0xf4,0x03]
+
+v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 clamp
+// GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 clamp ; encoding: [0xff,0x80,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
+
v_mad_i32_i24 v5, v1, v2, s3
// GFX11: v_mad_i32_i24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00]
@@ -3134,11 +3146,11 @@ v_mad_u16 v5.l, -1, exec_hi, src_scc
v_mad_u16 v5.l, src_scc, vcc_lo, -1
// GFX11: v_mad_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03]
-v_mad_u32_u16 v5, v1, v2, v3
-// GFX11: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04]
+v_mad_u32_u16 v5, v1.l, v2.l, v3
+// GFX11: v_mad_u32_u16 v5, v1.l, v2.l, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04]
-v_mad_u32_u16 v5, v255, v255, s3
-// GFX11: v_mad_u32_u16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00]
+v_mad_u32_u16 v5, v255.l, v255.l, s3
+// GFX11: v_mad_u32_u16 v5, v255.l, v255.l, s3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00]
v_mad_u32_u16 v5, s1, s2, v255
// GFX11: v_mad_u32_u16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07]
@@ -3179,6 +3191,18 @@ v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0]
v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp
// GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
+v_mad_u32_u16 v5, v1.h, v2.l, v3
+// GFX11: v_mad_u32_u16 v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x59,0xd6,0x01,0x05,0x0e,0x04]
+
+v_mad_u32_u16 v5, v255.l, v255.h, s3
+// GFX11: v_mad_u32_u16 v5, v255.l, v255.h, s3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x59,0xd6,0xff,0xff,0x0f,0x00]
+
+v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc
+// GFX11: v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x00,0x59,0xd6,0xfd,0xd4,0xf4,0x03]
+
+v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 clamp
+// GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 clamp ; encoding: [0xff,0x80,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
+
v_mad_u32_u24 v5, v1, v2, s3
// GFX11: v_mad_u32_u24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 2bff644605ff60..3a3f6c178cbde2 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -2000,47 +2000,68 @@ v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
-v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0]
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff]
-v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01]
-v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
-v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
+v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3]
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 row_mirror
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff]
+
+v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
+
+v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -2153,47 +2174,68 @@ v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo
v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30]
-v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0]
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff]
-v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff]
-v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff]
-v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff]
-v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01]
-v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13]
-v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_m...
[truncated]
|
CI error is not related |
kosarev
approved these changes
Jan 30, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
true16 asm/dasm support for v_mad_u32_i16 and v_mad_i32_i16