Skip to content

Commit d1ce0ba

Browse files
committed
true16 for V_CVT_PKNORM_I16/U16_F16 in MC
1 parent a1d71c3 commit d1ce0ba

13 files changed

+730
-190
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -639,8 +639,8 @@ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
639639
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
640640
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
641641

642-
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
643-
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
642+
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
643+
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
644644

645645
defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
646646

@@ -1752,8 +1752,8 @@ defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
17521752
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
17531753
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
17541754
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
1755-
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
1756-
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
1755+
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">;
1756+
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">;
17571757
defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;
17581758
defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">;
17591759
defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>;

llvm/test/MC/AMDGPU/gfx11_asm_vop3.s

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,11 +1259,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo
12591259
v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi
12601260
// GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
12611261

1262-
v_cvt_pk_norm_i16_f16 v5, v1, v2
1263-
// GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
1262+
v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l
1263+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
12641264

1265-
v_cvt_pk_norm_i16_f16 v5, v255, v255
1266-
// GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
1265+
v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l
1266+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
12671267

12681268
v_cvt_pk_norm_i16_f16 v5, s1, s2
12691269
// GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00]
@@ -1295,7 +1295,7 @@ v_cvt_pk_norm_i16_f16 v5, null, exec_lo
12951295
v_cvt_pk_norm_i16_f16 v5, -1, exec_hi
12961296
// GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00]
12971297

1298-
v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0]
1298+
v_cvt_pk_norm_i16_f16 v5, 0.5, -m0
12991299
// GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40]
13001300

13011301
v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -1304,11 +1304,29 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
13041304
v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
13051305
// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
13061306

1307-
v_cvt_pk_norm_u16_f16 v5, v1, v2
1308-
// GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
1307+
v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l
1308+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
13091309

1310-
v_cvt_pk_norm_u16_f16 v5, v255, v255
1311-
// GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
1310+
v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h
1311+
// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
1312+
1313+
v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo|
1314+
// GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x12,0xd7,0xfd,0xd4,0x00,0x20]
1315+
1316+
v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi|
1317+
// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
1318+
1319+
v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l
1320+
// GFX11: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
1321+
1322+
v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h
1323+
// GFX11: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
1324+
1325+
v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l
1326+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
1327+
1328+
v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l
1329+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
13121330

13131331
v_cvt_pk_norm_u16_f16 v5, s1, s2
13141332
// GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00]
@@ -1340,7 +1358,7 @@ v_cvt_pk_norm_u16_f16 v5, null, exec_lo
13401358
v_cvt_pk_norm_u16_f16 v5, -1, exec_hi
13411359
// GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00]
13421360

1343-
v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0]
1361+
v_cvt_pk_norm_u16_f16 v5, 0.5, -m0
13441362
// GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40]
13451363

13461364
v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -1349,6 +1367,24 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
13491367
v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
13501368
// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
13511369

1370+
v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l
1371+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
1372+
1373+
v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h
1374+
// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
1375+
1376+
v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo|
1377+
// GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x13,0xd7,0xfd,0xd4,0x00,0x20]
1378+
1379+
v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi|
1380+
// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
1381+
1382+
v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l
1383+
// GFX11: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
1384+
1385+
v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h
1386+
// GFX11: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
1387+
13521388
v_cvt_pk_u16_f32 v5, v1, v2
13531389
// GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00]
13541390

0 commit comments

Comments
 (0)