Skip to content

Commit 322f16e

Browse files
authored
[AMDGPU][True16][MC] true16 for v_sat_pk_u8_i16 (#120634)
Support true16 format for v_sat_pk_u8_i16 in MC
1 parent 3c700d1 commit 322f16e

28 files changed

+572
-259
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,7 @@ defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f1
10471047
defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
10481048
defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
10491049
defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
1050-
defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
1050+
defm V_SAT_PK_U8_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
10511051
defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
10521052
defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
10531053

llvm/test/MC/AMDGPU/gfx11_asm_vop1.s

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3236,50 +3236,59 @@ v_rsq_f64 v[5:6], src_scc
32363236
v_rsq_f64 v[254:255], 0xaf123456
32373237
// GFX11: v_rsq_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x62,0xfc,0x7f,0x56,0x34,0x12,0xaf]
32383238

3239-
v_sat_pk_u8_i16 v5, v1
3240-
// GFX11: v_sat_pk_u8_i16_e32 v5, v1 ; encoding: [0x01,0xc5,0x0a,0x7e]
3239+
v_sat_pk_u8_i16 v5.l, v1
3240+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, v1 ; encoding: [0x01,0xc5,0x0a,0x7e]
32413241

3242-
v_sat_pk_u8_i16 v5, v255
3243-
// GFX11: v_sat_pk_u8_i16_e32 v5, v255 ; encoding: [0xff,0xc5,0x0a,0x7e]
3242+
v_sat_pk_u8_i16 v5.l, v255
3243+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, v255 ; encoding: [0xff,0xc5,0x0a,0x7e]
32443244

3245-
v_sat_pk_u8_i16 v5, s1
3246-
// GFX11: v_sat_pk_u8_i16_e32 v5, s1 ; encoding: [0x01,0xc4,0x0a,0x7e]
3245+
v_sat_pk_u8_i16 v5.l, s1
3246+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, s1 ; encoding: [0x01,0xc4,0x0a,0x7e]
32473247

3248-
v_sat_pk_u8_i16 v5, s105
3249-
// GFX11: v_sat_pk_u8_i16_e32 v5, s105 ; encoding: [0x69,0xc4,0x0a,0x7e]
3248+
v_sat_pk_u8_i16 v5.l, s105
3249+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, s105 ; encoding: [0x69,0xc4,0x0a,0x7e]
32503250

3251-
v_sat_pk_u8_i16 v5, vcc_lo
3252-
// GFX11: v_sat_pk_u8_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e]
3251+
v_sat_pk_u8_i16 v5.l, vcc_lo
3252+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e]
32533253

3254-
v_sat_pk_u8_i16 v5, vcc_hi
3255-
// GFX11: v_sat_pk_u8_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e]
3254+
v_sat_pk_u8_i16 v5.l, vcc_hi
3255+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e]
32563256

3257-
v_sat_pk_u8_i16 v5, ttmp15
3258-
// GFX11: v_sat_pk_u8_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e]
3257+
v_sat_pk_u8_i16 v5.l, ttmp15
3258+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e]
32593259

3260-
v_sat_pk_u8_i16 v5, m0
3261-
// GFX11: v_sat_pk_u8_i16_e32 v5, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e]
3260+
v_sat_pk_u8_i16 v5.l, m0
3261+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e]
32623262

3263-
v_sat_pk_u8_i16 v5, exec_lo
3264-
// GFX11: v_sat_pk_u8_i16_e32 v5, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e]
3263+
v_sat_pk_u8_i16 v5.l, exec_lo
3264+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e]
32653265

3266-
v_sat_pk_u8_i16 v5, exec_hi
3267-
// GFX11: v_sat_pk_u8_i16_e32 v5, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e]
3266+
v_sat_pk_u8_i16 v5.l, exec_hi
3267+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e]
32683268

3269-
v_sat_pk_u8_i16 v5, null
3270-
// GFX11: v_sat_pk_u8_i16_e32 v5, null ; encoding: [0x7c,0xc4,0x0a,0x7e]
3269+
v_sat_pk_u8_i16 v5.l, null
3270+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, null ; encoding: [0x7c,0xc4,0x0a,0x7e]
32713271

3272-
v_sat_pk_u8_i16 v5, -1
3273-
// GFX11: v_sat_pk_u8_i16_e32 v5, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e]
3272+
v_sat_pk_u8_i16 v5.l, -1
3273+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e]
32743274

3275-
v_sat_pk_u8_i16 v5, 0.5
3276-
// GFX11: v_sat_pk_u8_i16_e32 v5, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e]
3275+
v_sat_pk_u8_i16 v5.l, 0.5
3276+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e]
32773277

3278-
v_sat_pk_u8_i16 v5, src_scc
3279-
// GFX11: v_sat_pk_u8_i16_e32 v5, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e]
3278+
v_sat_pk_u8_i16 v5.l, src_scc
3279+
// GFX11: v_sat_pk_u8_i16_e32 v5.l, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e]
32803280

3281-
v_sat_pk_u8_i16 v127, 0xfe0b
3282-
// GFX11: v_sat_pk_u8_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
3281+
v_sat_pk_u8_i16 v127.l, 0xfe0b
3282+
// GFX11: v_sat_pk_u8_i16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
3283+
3284+
v_sat_pk_u8_i16 v127.l, 0.5
3285+
// GFX11: v_sat_pk_u8_i16_e32 v127.l, 0.5 ; encoding: [0xf0,0xc4,0xfe,0x7e]
3286+
3287+
v_sat_pk_u8_i16 v5.h, src_scc
3288+
// GFX11: v_sat_pk_u8_i16_e32 v5.h, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7f]
3289+
3290+
v_sat_pk_u8_i16 v127.h, 0xfe0b
3291+
// GFX11: v_sat_pk_u8_i16_e32 v127.h, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
32833292

32843293
v_sin_f16 v5, v1
32853294
// GFX11: v_sin_f16_e32 v5, v1 ; encoding: [0x01,0xc1,0x0a,0x7e]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2522,47 +2522,56 @@ v_rsq_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
25222522
v_rsq_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
25232523
// GFX11: v_rsq_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x5c,0xfe,0x7f,0xff,0x6f,0x35,0x30]
25242524

2525-
v_sat_pk_u8_i16 v5, v1 quad_perm:[3,2,1,0]
2526-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
2525+
v_sat_pk_u8_i16 v5.l, v1 quad_perm:[3,2,1,0]
2526+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
25272527

2528-
v_sat_pk_u8_i16 v5, v1 quad_perm:[0,1,2,3]
2529-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
2528+
v_sat_pk_u8_i16 v5.l, v1 quad_perm:[0,1,2,3]
2529+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
25302530

2531-
v_sat_pk_u8_i16 v5, v1 row_mirror
2532-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff]
2531+
v_sat_pk_u8_i16 v5.l, v1 row_mirror
2532+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff]
25332533

2534-
v_sat_pk_u8_i16 v5, v1 row_half_mirror
2535-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff]
2534+
v_sat_pk_u8_i16 v5.l, v1 row_half_mirror
2535+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff]
25362536

2537-
v_sat_pk_u8_i16 v5, v1 row_shl:1
2538-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff]
2537+
v_sat_pk_u8_i16 v5.l, v1 row_shl:1
2538+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff]
25392539

2540-
v_sat_pk_u8_i16 v5, v1 row_shl:15
2541-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
2540+
v_sat_pk_u8_i16 v5.l, v1 row_shl:15
2541+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
25422542

2543-
v_sat_pk_u8_i16 v5, v1 row_shr:1
2544-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff]
2543+
v_sat_pk_u8_i16 v5.l, v1 row_shr:1
2544+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff]
25452545

2546-
v_sat_pk_u8_i16 v5, v1 row_shr:15
2547-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
2546+
v_sat_pk_u8_i16 v5.l, v1 row_shr:15
2547+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
25482548

2549-
v_sat_pk_u8_i16 v5, v1 row_ror:1
2550-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff]
2549+
v_sat_pk_u8_i16 v5.l, v1 row_ror:1
2550+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff]
25512551

2552-
v_sat_pk_u8_i16 v5, v1 row_ror:15
2553-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
2552+
v_sat_pk_u8_i16 v5.l, v1 row_ror:15
2553+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
25542554

2555-
v_sat_pk_u8_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
2556-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff]
2555+
v_sat_pk_u8_i16 v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf
2556+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff]
25572557

2558-
v_sat_pk_u8_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
2559-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
2558+
v_sat_pk_u8_i16 v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1
2559+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
25602560

2561-
v_sat_pk_u8_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
2562-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x09,0x13]
2561+
v_sat_pk_u8_i16 v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
2562+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x09,0x13]
25632563

2564-
v_sat_pk_u8_i16 v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
2565-
// GFX11: v_sat_pk_u8_i16_dpp v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x05,0x30]
2564+
v_sat_pk_u8_i16 v127.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
2565+
// GFX11: v_sat_pk_u8_i16_dpp v127.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x05,0x30]
2566+
2567+
v_sat_pk_u8_i16 v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1
2568+
// GFX11: v_sat_pk_u8_i16_dpp v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0x01,0x5f,0x01,0x01]
2569+
2570+
v_sat_pk_u8_i16 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
2571+
// GFX11: v_sat_pk_u8_i16_dpp v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xc4,0x0a,0x7f,0x01,0x60,0x09,0x13]
2572+
2573+
v_sat_pk_u8_i16 v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
2574+
// GFX11: v_sat_pk_u8_i16_dpp v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7f,0xff,0x6f,0x05,0x30]
25662575

25672576
v_sin_f16 v5, v1 quad_perm:[3,2,1,0]
25682577
// GFX11: v_sin_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -587,14 +587,23 @@ v_rsq_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
587587
v_rsq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
588588
// GFX11: v_rsq_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x5c,0xfe,0x7f,0xff,0x00,0x00,0x00]
589589

590-
v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
591-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05]
590+
v_sat_pk_u8_i16 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0]
591+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05]
592592

593-
v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
594-
// GFX11: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05]
593+
v_sat_pk_u8_i16 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
594+
// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05]
595595

596-
v_sat_pk_u8_i16 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
597-
// GFX11: v_sat_pk_u8_i16_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00]
596+
v_sat_pk_u8_i16 v127.l, v255 dpp8:[0,0,0,0,0,0,0,0]
597+
// GFX11: v_sat_pk_u8_i16_dpp v127.l, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00]
598+
599+
v_sat_pk_u8_i16 v127.l, v1 dpp8:[7,6,5,4,3,2,1,0]
600+
// GFX11: v_sat_pk_u8_i16_dpp v127.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0x01,0x77,0x39,0x05]
601+
602+
v_sat_pk_u8_i16 v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
603+
// GFX11: v_sat_pk_u8_i16_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05]
604+
605+
v_sat_pk_u8_i16 v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
606+
// GFX11: v_sat_pk_u8_i16_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00]
598607

599608
v_sin_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
600609
// GFX11: v_sin_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,24 @@ v_sat_pk_u8_i16_e32 v199, v5 dpp8:[7,6,5,4,3,2,1,0]
716716
v_sat_pk_u8_i16_e32 v199, v5 quad_perm:[3,2,1,0]
717717
// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
718718

719+
v_sat_pk_u8_i16_e32 v199.h, v5.h
720+
// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction
721+
722+
v_sat_pk_u8_i16_e32 v199.h, v5.h dpp8:[7,6,5,4,3,2,1,0]
723+
// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction
724+
725+
v_sat_pk_u8_i16_e32 v199.h, v5.h quad_perm:[3,2,1,0]
726+
// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction
727+
728+
v_sat_pk_u8_i16_e32 v199.l, v5.l
729+
// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction
730+
731+
v_sat_pk_u8_i16_e32 v199.l, v5.l dpp8:[7,6,5,4,3,2,1,0]
732+
// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction
733+
734+
v_sat_pk_u8_i16_e32 v199.l, v5.l quad_perm:[3,2,1,0]
735+
// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction
736+
719737
v_sin_f16_e32 v128, 0xfe0b
720738
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
721739

llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,14 +1802,23 @@ v_rsq_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
18021802
v_rsq_f16 v5, v199 quad_perm:[3,2,1,0]
18031803
// GFX11: v_rsq_f16_e64_dpp v5, v199 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff]
18041804

1805-
v_sat_pk_u8_i16 v199, v5
1806-
// GFX11: v_sat_pk_u8_i16_e64 v199, v5 ; encoding: [0xc7,0x00,0xe2,0xd5,0x05,0x01,0x00,0x00]
1805+
v_sat_pk_u8_i16 v199.h, v5
1806+
// GFX11: v_sat_pk_u8_i16_e64 v199.h, v5 op_sel:[0,1] ; encoding: [0xc7,0x40,0xe2,0xd5,0x05,0x01,0x00,0x00]
18071807

1808-
v_sat_pk_u8_i16 v199, v5 dpp8:[7,6,5,4,3,2,1,0]
1809-
// GFX11: v_sat_pk_u8_i16_e64_dpp v199, v5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xc7,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x05,0x77,0x39,0x05]
1808+
v_sat_pk_u8_i16 v199.h, v5 dpp8:[7,6,5,4,3,2,1,0]
1809+
// GFX11: v_sat_pk_u8_i16_e64_dpp v199.h, v5 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xc7,0x40,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x05,0x77,0x39,0x05]
18101810

1811-
v_sat_pk_u8_i16 v199, v5 quad_perm:[3,2,1,0]
1812-
// GFX11: v_sat_pk_u8_i16_e64_dpp v199, v5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff]
1811+
v_sat_pk_u8_i16 v199.h, v5 quad_perm:[3,2,1,0]
1812+
// GFX11: v_sat_pk_u8_i16_e64_dpp v199.h, v5 op_sel:[0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff]
1813+
1814+
v_sat_pk_u8_i16 v199.l, v5
1815+
// GFX11: v_sat_pk_u8_i16_e64 v199.l, v5 ; encoding: [0xc7,0x00,0xe2,0xd5,0x05,0x01,0x00,0x00]
1816+
1817+
v_sat_pk_u8_i16 v199.l, v5 dpp8:[7,6,5,4,3,2,1,0]
1818+
// GFX11: v_sat_pk_u8_i16_e64_dpp v199.l, v5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xc7,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x05,0x77,0x39,0x05]
1819+
1820+
v_sat_pk_u8_i16 v199.l, v5 quad_perm:[3,2,1,0]
1821+
// GFX11: v_sat_pk_u8_i16_e64_dpp v199.l, v5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff]
18131822

18141823
v_sin_f16 v128, 0xfe0b
18151824
// GFX11: v_sin_f16_e64 v128, 0xfe0b ; encoding: [0x80,0x00,0xe0,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00]

0 commit comments

Comments
 (0)