Skip to content

Commit e58bbde

Browse files
committed
true16 for v_mad_u/i32_u/i16
1 parent 7d172f9 commit e58bbde

13 files changed

+970
-400
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -645,8 +645,8 @@ defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
645645
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
646646

647647
let isCommutable = 1 in {
648-
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
649-
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
648+
defm V_MAD_U32_U16 : VOP3Inst_t16 <"v_mad_u32_u16", VOP_I32_I16_I16_I32>;
649+
defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
650650
} // End isCommutable = 1
651651

652652
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
@@ -1736,8 +1736,8 @@ defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
17361736
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
17371737
defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;
17381738
defm V_OR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x258>;
1739-
defm V_MAD_U32_U16 : VOP3_Realtriple_gfx11_gfx12<0x259>;
1740-
defm V_MAD_I32_I16 : VOP3_Realtriple_gfx11_gfx12<0x25a>;
1739+
defm V_MAD_U32_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x259, "v_mad_u32_u16">;
1740+
defm V_MAD_I32_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x25a, "v_mad_i32_i16">;
17411741
defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>;
17421742
defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>;
17431743
defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2906,11 +2906,11 @@ v_mad_i16 v5.l, -1, exec_hi, src_scc
29062906
v_mad_i16 v5.l, src_scc, vcc_lo, -1
29072907
// GFX11: v_mad_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03]
29082908

2909-
v_mad_i32_i16 v5, v1, v2, v3
2910-
// GFX11: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04]
2909+
v_mad_i32_i16 v5, v1.l, v2.l, v3
2910+
// GFX11: v_mad_i32_i16 v5, v1.l, v2.l, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04]
29112911

2912-
v_mad_i32_i16 v5, v255, v255, s3
2913-
// GFX11: v_mad_i32_i16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00]
2912+
v_mad_i32_i16 v5, v255.l, v255.l, s3
2913+
// GFX11: v_mad_i32_i16 v5, v255.l, v255.l, s3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00]
29142914

29152915
v_mad_i32_i16 v5, s1, s2, v255
29162916
// GFX11: v_mad_i32_i16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07]
@@ -2951,6 +2951,18 @@ v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0]
29512951
v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp
29522952
// GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
29532953

2954+
v_mad_i32_i16 v5, v1.h, v2.l, v3
2955+
// GFX11: v_mad_i32_i16 v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x5a,0xd6,0x01,0x05,0x0e,0x04]
2956+
2957+
v_mad_i32_i16 v5, v255.l, v255.h, s3
2958+
// GFX11: v_mad_i32_i16 v5, v255.l, v255.h, s3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x5a,0xd6,0xff,0xff,0x0f,0x00]
2959+
2960+
v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc
2961+
// GFX11: v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x00,0x5a,0xd6,0xfd,0xd4,0xf4,0x03]
2962+
2963+
v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 clamp
2964+
// GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 clamp ; encoding: [0xff,0x80,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
2965+
29542966
v_mad_i32_i24 v5, v1, v2, s3
29552967
// GFX11: v_mad_i32_i24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00]
29562968

@@ -3134,11 +3146,11 @@ v_mad_u16 v5.l, -1, exec_hi, src_scc
31343146
v_mad_u16 v5.l, src_scc, vcc_lo, -1
31353147
// GFX11: v_mad_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03]
31363148

3137-
v_mad_u32_u16 v5, v1, v2, v3
3138-
// GFX11: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04]
3149+
v_mad_u32_u16 v5, v1.l, v2.l, v3
3150+
// GFX11: v_mad_u32_u16 v5, v1.l, v2.l, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04]
31393151

3140-
v_mad_u32_u16 v5, v255, v255, s3
3141-
// GFX11: v_mad_u32_u16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00]
3152+
v_mad_u32_u16 v5, v255.l, v255.l, s3
3153+
// GFX11: v_mad_u32_u16 v5, v255.l, v255.l, s3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00]
31423154

31433155
v_mad_u32_u16 v5, s1, s2, v255
31443156
// GFX11: v_mad_u32_u16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07]
@@ -3179,6 +3191,18 @@ v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0]
31793191
v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp
31803192
// GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
31813193

3194+
v_mad_u32_u16 v5, v1.h, v2.l, v3
3195+
// GFX11: v_mad_u32_u16 v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x59,0xd6,0x01,0x05,0x0e,0x04]
3196+
3197+
v_mad_u32_u16 v5, v255.l, v255.h, s3
3198+
// GFX11: v_mad_u32_u16 v5, v255.l, v255.h, s3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x59,0xd6,0xff,0xff,0x0f,0x00]
3199+
3200+
v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc
3201+
// GFX11: v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x00,0x59,0xd6,0xfd,0xd4,0xf4,0x03]
3202+
3203+
v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 clamp
3204+
// GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 clamp ; encoding: [0xff,0x80,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00]
3205+
31823206
v_mad_u32_u24 v5, v1, v2, s3
31833207
// GFX11: v_mad_u32_u24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00]
31843208

0 commit comments

Comments
 (0)