Skip to content

Commit 4af3332

Browse files
authored
[AMDGPU][True16][MC] true16 for v_cvt_u32_u16 (llvm#120646)
Support true16 format for v_cvt_u32_u16 in MC
1 parent 40a00af commit 4af3332

28 files changed

+492
-235
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
10201020
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
10211021
defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x069, "v_not_b16">;
10221022
defm V_CVT_I32_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
1023-
defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
1023+
defm V_CVT_U32_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
10241024

10251025
defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
10261026
defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;

llvm/test/MC/AMDGPU/gfx11_asm_vop1.s

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1706,11 +1706,11 @@ v_cvt_u32_f64 v5, src_scc
17061706
v_cvt_u32_f64 v255, 0xaf123456
17071707
// GFX11: v_cvt_u32_f64_e32 v255, 0xaf123456 ; encoding: [0xff,0x2a,0xfe,0x7f,0x56,0x34,0x12,0xaf]
17081708

1709-
v_cvt_u32_u16 v5, v1
1710-
// GFX11: v_cvt_u32_u16_e32 v5, v1 ; encoding: [0x01,0xd7,0x0a,0x7e]
1709+
v_cvt_u32_u16 v5, v1.l
1710+
// GFX11: v_cvt_u32_u16_e32 v5, v1.l ; encoding: [0x01,0xd7,0x0a,0x7e]
17111711

1712-
v_cvt_u32_u16 v5, v127
1713-
// GFX11: v_cvt_u32_u16_e32 v5, v127 ; encoding: [0x7f,0xd7,0x0a,0x7e]
1712+
v_cvt_u32_u16 v5, v127.l
1713+
// GFX11: v_cvt_u32_u16_e32 v5, v127.l ; encoding: [0x7f,0xd7,0x0a,0x7e]
17141714

17151715
v_cvt_u32_u16 v5, s1
17161716
// GFX11: v_cvt_u32_u16_e32 v5, s1 ; encoding: [0x01,0xd6,0x0a,0x7e]
@@ -1751,6 +1751,12 @@ v_cvt_u32_u16 v5, src_scc
17511751
v_cvt_u32_u16 v255, 0xfe0b
17521752
// GFX11: v_cvt_u32_u16_e32 v255, 0xfe0b ; encoding: [0xff,0xd6,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
17531753

1754+
v_cvt_u32_u16 v5, v1.h
1755+
// GFX11: v_cvt_u32_u16_e32 v5, v1.h ; encoding: [0x81,0xd7,0x0a,0x7e]
1756+
1757+
v_cvt_u32_u16 v5, v127.h
1758+
// GFX11: v_cvt_u32_u16_e32 v5, v127.h ; encoding: [0xff,0xd7,0x0a,0x7e]
1759+
17541760
v_exp_f16 v5.l, v1.l
17551761
// GFX11: v_exp_f16_e32 v5.l, v1.l ; encoding: [0x01,0xb1,0x0a,0x7e]
17561762

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,47 +1280,56 @@ v_cvt_u32_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
12801280
v_cvt_u32_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
12811281
// GFX11: v_cvt_u32_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x0e,0xfe,0x7f,0xff,0x6f,0x35,0x30]
12821282

1283-
v_cvt_u32_u16 v5, v1 quad_perm:[3,2,1,0]
1284-
// GFX11: v_cvt_u32_u16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff]
1283+
v_cvt_u32_u16 v5, v1.l quad_perm:[3,2,1,0]
1284+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1b,0x00,0xff]
12851285

1286-
v_cvt_u32_u16 v5, v1 quad_perm:[0,1,2,3]
1287-
// GFX11: v_cvt_u32_u16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff]
1286+
v_cvt_u32_u16 v5, v1.l quad_perm:[0,1,2,3]
1287+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0xe4,0x00,0xff]
12881288

1289-
v_cvt_u32_u16 v5, v1 row_mirror
1290-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff]
1289+
v_cvt_u32_u16 v5, v1.l row_mirror
1290+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x40,0x01,0xff]
12911291

1292-
v_cvt_u32_u16 v5, v1 row_half_mirror
1293-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff]
1292+
v_cvt_u32_u16 v5, v1.l row_half_mirror
1293+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x41,0x01,0xff]
12941294

1295-
v_cvt_u32_u16 v5, v1 row_shl:1
1296-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff]
1295+
v_cvt_u32_u16 v5, v1.l row_shl:1
1296+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x01,0x01,0xff]
12971297

1298-
v_cvt_u32_u16 v5, v1 row_shl:15
1299-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff]
1298+
v_cvt_u32_u16 v5, v1.l row_shl:15
1299+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x0f,0x01,0xff]
13001300

1301-
v_cvt_u32_u16 v5, v1 row_shr:1
1302-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff]
1301+
v_cvt_u32_u16 v5, v1.l row_shr:1
1302+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x11,0x01,0xff]
13031303

1304-
v_cvt_u32_u16 v5, v1 row_shr:15
1305-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff]
1304+
v_cvt_u32_u16 v5, v1.l row_shr:15
1305+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x1f,0x01,0xff]
13061306

1307-
v_cvt_u32_u16 v5, v1 row_ror:1
1308-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff]
1307+
v_cvt_u32_u16 v5, v1.l row_ror:1
1308+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x21,0x01,0xff]
13091309

1310-
v_cvt_u32_u16 v5, v1 row_ror:15
1311-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff]
1310+
v_cvt_u32_u16 v5, v1.l row_ror:15
1311+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x2f,0x01,0xff]
13121312

1313-
v_cvt_u32_u16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
1314-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff]
1313+
v_cvt_u32_u16 v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf
1314+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x50,0x01,0xff]
13151315

1316-
v_cvt_u32_u16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
1317-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01]
1316+
v_cvt_u32_u16 v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1
1317+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x5f,0x01,0x01]
13181318

1319-
v_cvt_u32_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
1320-
// GFX11: v_cvt_u32_u16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13]
1319+
v_cvt_u32_u16 v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
1320+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x01,0x60,0x09,0x13]
13211321

1322-
v_cvt_u32_u16 v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1323-
// GFX11: v_cvt_u32_u16_dpp v255, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x05,0x30]
1322+
v_cvt_u32_u16 v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
1323+
// GFX11: v_cvt_u32_u16_dpp v255, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0x7f,0x6f,0x05,0x30]
1324+
1325+
v_cvt_u32_u16 v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
1326+
// GFX11: v_cvt_u32_u16_dpp v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x7f,0x5f,0x01,0x01]
1327+
1328+
v_cvt_u32_u16 v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
1329+
// GFX11: v_cvt_u32_u16_dpp v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xd6,0x0a,0x7e,0x81,0x60,0x09,0x13]
1330+
1331+
v_cvt_u32_u16 v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1332+
// GFX11: v_cvt_u32_u16_dpp v255, v127.h row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xd6,0xfe,0x7f,0xff,0x6f,0x05,0x30]
13241333

13251334
v_exp_f16 v5.l, v1.l quad_perm:[3,2,1,0]
13261335
// GFX11: v_exp_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,14 +317,23 @@ v_cvt_u32_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
317317
v_cvt_u32_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
318318
// GFX11: v_cvt_u32_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x0e,0xfe,0x7f,0xff,0x00,0x00,0x00]
319319

320-
v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
321-
// GFX11: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05]
320+
v_cvt_u32_u16 v5, v1.l dpp8:[7,6,5,4,3,2,1,0]
321+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05]
322322

323-
v_cvt_u32_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
324-
// GFX11: v_cvt_u32_u16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05]
323+
v_cvt_u32_u16 v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1
324+
// GFX11: v_cvt_u32_u16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x01,0x77,0x39,0x05]
325325

326-
v_cvt_u32_u16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
327-
// GFX11: v_cvt_u32_u16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00]
326+
v_cvt_u32_u16 v255, v127.l dpp8:[0,0,0,0,0,0,0,0]
327+
// GFX11: v_cvt_u32_u16_dpp v255, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0x7f,0x00,0x00,0x00]
328+
329+
v_cvt_u32_u16 v5, v127.l dpp8:[7,6,5,4,3,2,1,0]
330+
// GFX11: v_cvt_u32_u16_dpp v5, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd6,0x0a,0x7e,0x7f,0x77,0x39,0x05]
331+
332+
v_cvt_u32_u16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1
333+
// GFX11: v_cvt_u32_u16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xd6,0x0a,0x7e,0x81,0x77,0x39,0x05]
334+
335+
v_cvt_u32_u16 v255, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
336+
// GFX11: v_cvt_u32_u16_dpp v255, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xd6,0xfe,0x7f,0xff,0x00,0x00,0x00]
328337

329338
v_exp_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0]
330339
// GFX11: v_exp_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb0,0x0a,0x7e,0x01,0x77,0x39,0x05]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,24 @@ v_cvt_u32_u16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
431431
v_cvt_u32_u16_e32 v5, v199 quad_perm:[3,2,1,0]
432432
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
433433

434+
v_cvt_u32_u16_e32 v5.h, v199.h
435+
// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction
436+
437+
v_cvt_u32_u16_e32 v5.h, v199.h dpp8:[7,6,5,4,3,2,1,0]
438+
// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction
439+
440+
v_cvt_u32_u16_e32 v5.h, v199.h quad_perm:[3,2,1,0]
441+
// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction
442+
443+
v_cvt_u32_u16_e32 v5.l, v199.l
444+
// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction
445+
446+
v_cvt_u32_u16_e32 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0]
447+
// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction
448+
449+
v_cvt_u32_u16_e32 v5.l, v199.l quad_perm:[3,2,1,0]
450+
// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction
451+
434452
v_exp_f16_e32 v128.h, 0xfe0b
435453
// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction
436454

llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,14 +1142,23 @@ v_cvt_u16_f16 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0]
11421142
v_cvt_u16_f16 v5.l, v199.l quad_perm:[3,2,1,0]
11431143
// GFX11: v_cvt_u16_f16_e64_dpp v5.l, v199.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff]
11441144

1145-
v_cvt_u32_u16 v5, v199
1146-
// GFX11: v_cvt_u32_u16_e64 v5, v199 ; encoding: [0x05,0x00,0xeb,0xd5,0xc7,0x01,0x00,0x00]
1145+
v_cvt_u32_u16 v5, v199.h
1146+
// GFX11: v_cvt_u32_u16_e64 v5, v199.h op_sel:[1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xc7,0x01,0x00,0x00]
11471147

1148-
v_cvt_u32_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
1149-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v199 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05]
1148+
v_cvt_u32_u16 v5, v199.h dpp8:[7,6,5,4,3,2,1,0]
1149+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05]
11501150

1151-
v_cvt_u32_u16 v5, v199 quad_perm:[3,2,1,0]
1152-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v199 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff]
1151+
v_cvt_u32_u16 v5, v199.h quad_perm:[3,2,1,0]
1152+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff]
1153+
1154+
v_cvt_u32_u16 v5, v199.l
1155+
// GFX11: v_cvt_u32_u16_e64 v5, v199.l ; encoding: [0x05,0x00,0xeb,0xd5,0xc7,0x01,0x00,0x00]
1156+
1157+
v_cvt_u32_u16 v5, v199.l dpp8:[7,6,5,4,3,2,1,0]
1158+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xc7,0x77,0x39,0x05]
1159+
1160+
v_cvt_u32_u16 v5, v199.l quad_perm:[3,2,1,0]
1161+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v199.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff]
11531162

11541163
v_exp_f16 v128, 0xfe0b
11551164
// GFX11: v_exp_f16_e64 v128, 0xfe0b ; encoding: [0x80,0x00,0xd8,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00]

llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,47 +1342,50 @@ v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
13421342
v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
13431343
// GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x05,0x30]
13441344

1345-
v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
1346-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
1345+
v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0]
1346+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
13471347

1348-
v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
1349-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
1348+
v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3]
1349+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
13501350

1351-
v_cvt_u32_u16_e64_dpp v5, v1 row_mirror
1352-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
1351+
v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror
1352+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
13531353

1354-
v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror
1355-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
1354+
v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror
1355+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
13561356

1357-
v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1
1358-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
1357+
v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1
1358+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
13591359

1360-
v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15
1361-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
1360+
v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15
1361+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
13621362

1363-
v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1
1364-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
1363+
v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1
1364+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
13651365

1366-
v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15
1367-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
1366+
v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15
1367+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
13681368

1369-
v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1
1370-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
1369+
v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1
1370+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
13711371

1372-
v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15
1373-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
1372+
v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15
1373+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
13741374

1375-
v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
1376-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
1375+
v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf
1376+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
13771377

1378-
v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
1379-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
1378+
v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1
1379+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
13801380

1381-
v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
1382-
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
1381+
v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
1382+
// GFX11: v_cvt_u32_u16_e64_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
13831383

1384-
v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1385-
// GFX11: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
1384+
v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1385+
// GFX11: v_cvt_u32_u16_e64_dpp v255, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
1386+
1387+
v_cvt_u32_u16_e64_dpp v255, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
1388+
// GFX11: [0xff,0x08,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
13861389

13871390
v_exp_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0]
13881391
// GFX11: v_exp_f16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]

0 commit comments

Comments
 (0)