Skip to content

Commit e5acb16

Browse files
authored
[AMDGPU][True16][MC] true16 for v_trunc_f16 (#120693)
Support true16 format for v_trunc_f16 in MC
1 parent 322f16e commit e5acb16

29 files changed

+1074
-465
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1042,7 +1042,7 @@ defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f1
10421042
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
10431043
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
10441044
defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
1045-
defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
1045+
defm V_TRUNC_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05d, "v_trunc_f16">;
10461046
defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
10471047
defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
10481048
defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;

llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
33
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
44
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
5+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
56

67
declare half @llvm.trunc.f16(half %a)
78
declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a)
@@ -62,6 +63,24 @@ define amdgpu_kernel void @trunc_f16(
6263
; GFX11-NEXT: v_trunc_f16_e32 v0, v0
6364
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
6465
; GFX11-NEXT: s_endpgm
66+
;
67+
; GFX12-LABEL: trunc_f16:
68+
; GFX12: ; %bb.0: ; %entry
69+
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
70+
; GFX12-NEXT: s_mov_b32 s6, -1
71+
; GFX12-NEXT: s_mov_b32 s7, 0x31016000
72+
; GFX12-NEXT: s_mov_b32 s10, s6
73+
; GFX12-NEXT: s_mov_b32 s11, s7
74+
; GFX12-NEXT: s_wait_kmcnt 0x0
75+
; GFX12-NEXT: s_mov_b32 s8, s2
76+
; GFX12-NEXT: s_mov_b32 s9, s3
77+
; GFX12-NEXT: s_mov_b32 s4, s0
78+
; GFX12-NEXT: buffer_load_u16 v0, off, s[8:11], null
79+
; GFX12-NEXT: s_mov_b32 s5, s1
80+
; GFX12-NEXT: s_wait_loadcnt 0x0
81+
; GFX12-NEXT: v_trunc_f16_e32 v0, v0
82+
; GFX12-NEXT: buffer_store_b16 v0, off, s[4:7], null
83+
; GFX12-NEXT: s_endpgm
6584
ptr addrspace(1) %r,
6685
ptr addrspace(1) %a) {
6786
entry:
@@ -147,6 +166,28 @@ define amdgpu_kernel void @trunc_v2f16(
147166
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
148167
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
149168
; GFX11-NEXT: s_endpgm
169+
;
170+
; GFX12-LABEL: trunc_v2f16:
171+
; GFX12: ; %bb.0: ; %entry
172+
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
173+
; GFX12-NEXT: s_mov_b32 s6, -1
174+
; GFX12-NEXT: s_mov_b32 s7, 0x31016000
175+
; GFX12-NEXT: s_mov_b32 s10, s6
176+
; GFX12-NEXT: s_mov_b32 s11, s7
177+
; GFX12-NEXT: s_wait_kmcnt 0x0
178+
; GFX12-NEXT: s_mov_b32 s8, s2
179+
; GFX12-NEXT: s_mov_b32 s9, s3
180+
; GFX12-NEXT: s_mov_b32 s4, s0
181+
; GFX12-NEXT: buffer_load_b32 v0, off, s[8:11], null
182+
; GFX12-NEXT: s_mov_b32 s5, s1
183+
; GFX12-NEXT: s_wait_loadcnt 0x0
184+
; GFX12-NEXT: v_lshrrev_b32_e32 v1, 16, v0
185+
; GFX12-NEXT: v_trunc_f16_e32 v0, v0
186+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
187+
; GFX12-NEXT: v_trunc_f16_e32 v1, v1
188+
; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1
189+
; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null
190+
; GFX12-NEXT: s_endpgm
150191
ptr addrspace(1) %r,
151192
ptr addrspace(1) %a) {
152193
entry:

llvm/test/MC/AMDGPU/gfx11_asm_vop1.s

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3533,50 +3533,65 @@ v_swaprel_b32 v5, v1
35333533
v_swaprel_b32 v255, v255
35343534
// GFX11: v_swaprel_b32 v255, v255 ; encoding: [0xff,0xd1,0xfe,0x7f]
35353535

3536-
v_trunc_f16 v5, v1
3537-
// GFX11: v_trunc_f16_e32 v5, v1 ; encoding: [0x01,0xbb,0x0a,0x7e]
3536+
v_trunc_f16 v5.l, v1.l
3537+
// GFX11: v_trunc_f16_e32 v5.l, v1.l ; encoding: [0x01,0xbb,0x0a,0x7e]
35383538

3539-
v_trunc_f16 v5, v127
3540-
// GFX11: v_trunc_f16_e32 v5, v127 ; encoding: [0x7f,0xbb,0x0a,0x7e]
3539+
v_trunc_f16 v5.l, v127.l
3540+
// GFX11: v_trunc_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xbb,0x0a,0x7e]
35413541

3542-
v_trunc_f16 v5, s1
3543-
// GFX11: v_trunc_f16_e32 v5, s1 ; encoding: [0x01,0xba,0x0a,0x7e]
3542+
v_trunc_f16 v5.l, s1
3543+
// GFX11: v_trunc_f16_e32 v5.l, s1 ; encoding: [0x01,0xba,0x0a,0x7e]
35443544

3545-
v_trunc_f16 v5, s105
3546-
// GFX11: v_trunc_f16_e32 v5, s105 ; encoding: [0x69,0xba,0x0a,0x7e]
3545+
v_trunc_f16 v5.l, s105
3546+
// GFX11: v_trunc_f16_e32 v5.l, s105 ; encoding: [0x69,0xba,0x0a,0x7e]
35473547

3548-
v_trunc_f16 v5, vcc_lo
3549-
// GFX11: v_trunc_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xba,0x0a,0x7e]
3548+
v_trunc_f16 v5.l, vcc_lo
3549+
// GFX11: v_trunc_f16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xba,0x0a,0x7e]
35503550

3551-
v_trunc_f16 v5, vcc_hi
3552-
// GFX11: v_trunc_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xba,0x0a,0x7e]
3551+
v_trunc_f16 v5.l, vcc_hi
3552+
// GFX11: v_trunc_f16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xba,0x0a,0x7e]
35533553

3554-
v_trunc_f16 v5, ttmp15
3555-
// GFX11: v_trunc_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xba,0x0a,0x7e]
3554+
v_trunc_f16 v5.l, ttmp15
3555+
// GFX11: v_trunc_f16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xba,0x0a,0x7e]
35563556

3557-
v_trunc_f16 v5, m0
3558-
// GFX11: v_trunc_f16_e32 v5, m0 ; encoding: [0x7d,0xba,0x0a,0x7e]
3557+
v_trunc_f16 v5.l, m0
3558+
// GFX11: v_trunc_f16_e32 v5.l, m0 ; encoding: [0x7d,0xba,0x0a,0x7e]
35593559

3560-
v_trunc_f16 v5, exec_lo
3561-
// GFX11: v_trunc_f16_e32 v5, exec_lo ; encoding: [0x7e,0xba,0x0a,0x7e]
3560+
v_trunc_f16 v5.l, exec_lo
3561+
// GFX11: v_trunc_f16_e32 v5.l, exec_lo ; encoding: [0x7e,0xba,0x0a,0x7e]
35623562

3563-
v_trunc_f16 v5, exec_hi
3564-
// GFX11: v_trunc_f16_e32 v5, exec_hi ; encoding: [0x7f,0xba,0x0a,0x7e]
3563+
v_trunc_f16 v5.l, exec_hi
3564+
// GFX11: v_trunc_f16_e32 v5.l, exec_hi ; encoding: [0x7f,0xba,0x0a,0x7e]
35653565

3566-
v_trunc_f16 v5, null
3567-
// GFX11: v_trunc_f16_e32 v5, null ; encoding: [0x7c,0xba,0x0a,0x7e]
3566+
v_trunc_f16 v5.l, null
3567+
// GFX11: v_trunc_f16_e32 v5.l, null ; encoding: [0x7c,0xba,0x0a,0x7e]
35683568

3569-
v_trunc_f16 v5, -1
3570-
// GFX11: v_trunc_f16_e32 v5, -1 ; encoding: [0xc1,0xba,0x0a,0x7e]
3569+
v_trunc_f16 v5.l, -1
3570+
// GFX11: v_trunc_f16_e32 v5.l, -1 ; encoding: [0xc1,0xba,0x0a,0x7e]
35713571

3572-
v_trunc_f16 v5, 0.5
3573-
// GFX11: v_trunc_f16_e32 v5, 0.5 ; encoding: [0xf0,0xba,0x0a,0x7e]
3572+
v_trunc_f16 v5.l, 0.5
3573+
// GFX11: v_trunc_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xba,0x0a,0x7e]
35743574

3575-
v_trunc_f16 v5, src_scc
3576-
// GFX11: v_trunc_f16_e32 v5, src_scc ; encoding: [0xfd,0xba,0x0a,0x7e]
3575+
v_trunc_f16 v5.l, src_scc
3576+
// GFX11: v_trunc_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xba,0x0a,0x7e]
35773577

3578-
v_trunc_f16 v127, 0xfe0b
3579-
// GFX11: v_trunc_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xba,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
3578+
v_trunc_f16 v127.l, 0xfe0b
3579+
// GFX11: v_trunc_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xba,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
3580+
3581+
v_trunc_f16 v5.l, v1.h
3582+
// GFX11: v_trunc_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbb,0x0a,0x7e]
3583+
3584+
v_trunc_f16 v5.l, v127.h
3585+
// GFX11: v_trunc_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbb,0x0a,0x7e]
3586+
3587+
v_trunc_f16 v127.l, 0.5
3588+
// GFX11: v_trunc_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xba,0xfe,0x7e]
3589+
3590+
v_trunc_f16 v5.h, src_scc
3591+
// GFX11: v_trunc_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xba,0x0a,0x7f]
3592+
3593+
v_trunc_f16 v127.h, 0xfe0b
3594+
// GFX11: v_trunc_f16_e32 v127.h, 0xfe0b ; encoding: [0xff,0xba,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
35803595

35813596
v_trunc_f32 v5, v1
35823597
// GFX11: v_trunc_f32_e32 v5, v1 ; encoding: [0x01,0x43,0x0a,0x7e]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,47 +2741,56 @@ v_sqrt_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
27412741
v_sqrt_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
27422742
// GFX11: v_sqrt_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x66,0xfe,0x7f,0xff,0x6f,0x35,0x30]
27432743

2744-
v_trunc_f16 v5, v1 quad_perm:[3,2,1,0]
2745-
// GFX11: v_trunc_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x00,0xff]
2744+
v_trunc_f16 v5.l, v1.l quad_perm:[3,2,1,0]
2745+
// GFX11: v_trunc_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1b,0x00,0xff]
27462746

2747-
v_trunc_f16 v5, v1 quad_perm:[0,1,2,3]
2748-
// GFX11: v_trunc_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0xe4,0x00,0xff]
2747+
v_trunc_f16 v5.l, v1.l quad_perm:[0,1,2,3]
2748+
// GFX11: v_trunc_f16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0xe4,0x00,0xff]
27492749

2750-
v_trunc_f16 v5, v1 row_mirror
2751-
// GFX11: v_trunc_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x40,0x01,0xff]
2750+
v_trunc_f16 v5.l, v1.l row_mirror
2751+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x40,0x01,0xff]
27522752

2753-
v_trunc_f16 v5, v1 row_half_mirror
2754-
// GFX11: v_trunc_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x41,0x01,0xff]
2753+
v_trunc_f16 v5.l, v1.l row_half_mirror
2754+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x41,0x01,0xff]
27552755

2756-
v_trunc_f16 v5, v1 row_shl:1
2757-
// GFX11: v_trunc_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x01,0x01,0xff]
2756+
v_trunc_f16 v5.l, v1.l row_shl:1
2757+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x01,0x01,0xff]
27582758

2759-
v_trunc_f16 v5, v1 row_shl:15
2760-
// GFX11: v_trunc_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x0f,0x01,0xff]
2759+
v_trunc_f16 v5.l, v1.l row_shl:15
2760+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x0f,0x01,0xff]
27612761

2762-
v_trunc_f16 v5, v1 row_shr:1
2763-
// GFX11: v_trunc_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x11,0x01,0xff]
2762+
v_trunc_f16 v5.l, v1.l row_shr:1
2763+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x11,0x01,0xff]
27642764

2765-
v_trunc_f16 v5, v1 row_shr:15
2766-
// GFX11: v_trunc_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1f,0x01,0xff]
2765+
v_trunc_f16 v5.l, v1.l row_shr:15
2766+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x1f,0x01,0xff]
27672767

2768-
v_trunc_f16 v5, v1 row_ror:1
2769-
// GFX11: v_trunc_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x21,0x01,0xff]
2768+
v_trunc_f16 v5.l, v1.l row_ror:1
2769+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x21,0x01,0xff]
27702770

2771-
v_trunc_f16 v5, v1 row_ror:15
2772-
// GFX11: v_trunc_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x2f,0x01,0xff]
2771+
v_trunc_f16 v5.l, v1.l row_ror:15
2772+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x2f,0x01,0xff]
27732773

2774-
v_trunc_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
2775-
// GFX11: v_trunc_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x50,0x01,0xff]
2774+
v_trunc_f16 v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf
2775+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x50,0x01,0xff]
27762776

2777-
v_trunc_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
2778-
// GFX11: v_trunc_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x5f,0x01,0x01]
2777+
v_trunc_f16 v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1
2778+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x5f,0x01,0x01]
27792779

2780-
v_trunc_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
2781-
// GFX11: v_trunc_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x60,0x09,0x13]
2780+
v_trunc_f16 v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
2781+
// GFX11: v_trunc_f16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xba,0x0a,0x7e,0x01,0x60,0x09,0x13]
27822782

2783-
v_trunc_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
2784-
// GFX11: v_trunc_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xba,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
2783+
v_trunc_f16 v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
2784+
// GFX11: v_trunc_f16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xba,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
2785+
2786+
v_trunc_f16 v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
2787+
// GFX11: v_trunc_f16_dpp v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xba,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
2788+
2789+
v_trunc_f16 v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
2790+
// GFX11: v_trunc_f16_dpp v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xba,0x0a,0x7f,0x81,0x60,0x09,0x13]
2791+
2792+
v_trunc_f16 v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
2793+
// GFX11: v_trunc_f16_dpp v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xba,0xfe,0x7f,0xff,0x6f,0x35,0x30]
27852794

27862795
v_trunc_f32 v5, v1 quad_perm:[3,2,1,0]
27872796
// GFX11: v_trunc_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x42,0x0a,0x7e,0x01,0x1b,0x00,0xff]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -644,14 +644,23 @@ v_sqrt_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
644644
v_sqrt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
645645
// GFX11: v_sqrt_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x66,0xfe,0x7f,0xff,0x00,0x00,0x00]
646646

647-
v_trunc_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
648-
// GFX11: v_trunc_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05]
647+
v_trunc_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0]
648+
// GFX11: v_trunc_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05]
649649

650-
v_trunc_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
651-
// GFX11: v_trunc_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05]
650+
v_trunc_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1
651+
// GFX11: v_trunc_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xba,0x0a,0x7e,0x01,0x77,0x39,0x05]
652652

653-
v_trunc_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
654-
// GFX11: v_trunc_f16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xba,0xfe,0x7e,0x7f,0x00,0x00,0x00]
653+
v_trunc_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
654+
// GFX11: v_trunc_f16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xba,0xfe,0x7e,0x7f,0x00,0x00,0x00]
655+
656+
v_trunc_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
657+
// GFX11: v_trunc_f16_dpp v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xba,0xfe,0x7e,0x7f,0x77,0x39,0x05]
658+
659+
v_trunc_f16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1
660+
// GFX11: v_trunc_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xba,0x0a,0x7f,0x81,0x77,0x39,0x05]
661+
662+
v_trunc_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
663+
// GFX11: v_trunc_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xba,0xfe,0x7f,0xff,0x00,0x00,0x00]
655664

656665
v_trunc_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
657666
// GFX11: v_trunc_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x42,0x0a,0x7e,0x01,0x77,0x39,0x05]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
22
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
33
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
44

@@ -812,6 +812,12 @@ v_swap_b16_e32 v128.l, v0.l
812812
v_trunc_f16_e32 v128, 0xfe0b
813813
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
814814

815+
v_trunc_f16_e32 v128.h, 0xfe0b
816+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
817+
818+
v_trunc_f16_e32 v128.l, 0xfe0b
819+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
820+
815821
v_trunc_f16_e32 v255, v1
816822
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
817823

@@ -821,6 +827,24 @@ v_trunc_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0]
821827
v_trunc_f16_e32 v255, v1 quad_perm:[3,2,1,0]
822828
// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
823829

830+
v_trunc_f16_e32 v255.h, v1.h
831+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
832+
833+
v_trunc_f16_e32 v255.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
834+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
835+
836+
v_trunc_f16_e32 v255.h, v1.h quad_perm:[3,2,1,0]
837+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
838+
839+
v_trunc_f16_e32 v255.l, v1.l
840+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
841+
842+
v_trunc_f16_e32 v255.l, v1.l dpp8:[7,6,5,4,3,2,1,0]
843+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
844+
845+
v_trunc_f16_e32 v255.l, v1.l quad_perm:[3,2,1,0]
846+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
847+
824848
v_trunc_f16_e32 v5, v199
825849
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
826850

@@ -829,3 +853,21 @@ v_trunc_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
829853

830854
v_trunc_f16_e32 v5, v199 quad_perm:[3,2,1,0]
831855
// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
856+
857+
v_trunc_f16_e32 v5.h, v199.h
858+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
859+
860+
v_trunc_f16_e32 v5.h, v199.h dpp8:[7,6,5,4,3,2,1,0]
861+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
862+
863+
v_trunc_f16_e32 v5.h, v199.h quad_perm:[3,2,1,0]
864+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
865+
866+
v_trunc_f16_e32 v5.l, v199.l
867+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
868+
869+
v_trunc_f16_e32 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0]
870+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
871+
872+
v_trunc_f16_e32 v5.l, v199.l quad_perm:[3,2,1,0]
873+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction

0 commit comments

Comments
 (0)