Skip to content

Commit d7acf03

Browse files
authored
[AMDGPU][True16][MC] true16 for v_rndne_f16 (#120691)
Support true16 format for v_rndne_b16 in MC
1 parent d85b22e commit d7acf03

29 files changed

+1071
-462
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1043,7 +1043,7 @@ defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f1
10431043
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
10441044
defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
10451045
defm V_TRUNC_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05d, "v_trunc_f16">;
1046-
defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
1046+
defm V_RNDNE_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05e, "v_rndne_f16">;
10471047
defm V_FRACT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05f, "v_fract_f16">;
10481048
defm V_SIN_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x060, "v_sin_f16">;
10491049
defm V_COS_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x061, "v_cos_f16">;

llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX89,VI %s
44
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX89,GFX9 %s
55
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11 %s
6+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX12 %s
67

78
declare half @llvm.rint.f16(half %a)
89
declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)
@@ -63,6 +64,24 @@ define amdgpu_kernel void @rint_f16(
6364
; GFX11-NEXT: v_rndne_f16_e32 v0, v0
6465
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
6566
; GFX11-NEXT: s_endpgm
67+
;
68+
; GFX12-LABEL: rint_f16:
69+
; GFX12: ; %bb.0: ; %entry
70+
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
71+
; GFX12-NEXT: s_mov_b32 s6, -1
72+
; GFX12-NEXT: s_mov_b32 s7, 0x31016000
73+
; GFX12-NEXT: s_mov_b32 s10, s6
74+
; GFX12-NEXT: s_mov_b32 s11, s7
75+
; GFX12-NEXT: s_wait_kmcnt 0x0
76+
; GFX12-NEXT: s_mov_b32 s8, s2
77+
; GFX12-NEXT: s_mov_b32 s9, s3
78+
; GFX12-NEXT: s_mov_b32 s4, s0
79+
; GFX12-NEXT: buffer_load_u16 v0, off, s[8:11], null
80+
; GFX12-NEXT: s_mov_b32 s5, s1
81+
; GFX12-NEXT: s_wait_loadcnt 0x0
82+
; GFX12-NEXT: v_rndne_f16_e32 v0, v0
83+
; GFX12-NEXT: buffer_store_b16 v0, off, s[4:7], null
84+
; GFX12-NEXT: s_endpgm
6685
ptr addrspace(1) %r,
6786
ptr addrspace(1) %a) {
6887
entry:
@@ -168,6 +187,28 @@ define amdgpu_kernel void @rint_v2f16(
168187
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
169188
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
170189
; GFX11-NEXT: s_endpgm
190+
;
191+
; GFX12-LABEL: rint_v2f16:
192+
; GFX12: ; %bb.0: ; %entry
193+
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
194+
; GFX12-NEXT: s_mov_b32 s6, -1
195+
; GFX12-NEXT: s_mov_b32 s7, 0x31016000
196+
; GFX12-NEXT: s_mov_b32 s10, s6
197+
; GFX12-NEXT: s_mov_b32 s11, s7
198+
; GFX12-NEXT: s_wait_kmcnt 0x0
199+
; GFX12-NEXT: s_mov_b32 s8, s2
200+
; GFX12-NEXT: s_mov_b32 s9, s3
201+
; GFX12-NEXT: s_mov_b32 s4, s0
202+
; GFX12-NEXT: buffer_load_b32 v0, off, s[8:11], null
203+
; GFX12-NEXT: s_mov_b32 s5, s1
204+
; GFX12-NEXT: s_wait_loadcnt 0x0
205+
; GFX12-NEXT: v_lshrrev_b32_e32 v1, 16, v0
206+
; GFX12-NEXT: v_rndne_f16_e32 v0, v0
207+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
208+
; GFX12-NEXT: v_rndne_f16_e32 v1, v1
209+
; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1
210+
; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null
211+
; GFX12-NEXT: s_endpgm
171212
ptr addrspace(1) %r,
172213
ptr addrspace(1) %a) {
173214
entry:

llvm/test/MC/AMDGPU/gfx11_asm_vop1.s

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3044,50 +3044,65 @@ v_readfirstlane_b32 ttmp15, v1
30443044
v_readfirstlane_b32 null, v255
30453045
// GFX11: v_readfirstlane_b32 null, v255 ; encoding: [0xff,0x05,0xf8,0x7e]
30463046

3047-
v_rndne_f16 v5, v1
3048-
// GFX11: v_rndne_f16_e32 v5, v1 ; encoding: [0x01,0xbd,0x0a,0x7e]
3047+
v_rndne_f16 v5.l, v1.l
3048+
// GFX11: v_rndne_f16_e32 v5.l, v1.l ; encoding: [0x01,0xbd,0x0a,0x7e]
30493049

3050-
v_rndne_f16 v5, v127
3051-
// GFX11: v_rndne_f16_e32 v5, v127 ; encoding: [0x7f,0xbd,0x0a,0x7e]
3050+
v_rndne_f16 v5.l, v127.l
3051+
// GFX11: v_rndne_f16_e32 v5.l, v127.l ; encoding: [0x7f,0xbd,0x0a,0x7e]
30523052

3053-
v_rndne_f16 v5, s1
3054-
// GFX11: v_rndne_f16_e32 v5, s1 ; encoding: [0x01,0xbc,0x0a,0x7e]
3053+
v_rndne_f16 v5.l, s1
3054+
// GFX11: v_rndne_f16_e32 v5.l, s1 ; encoding: [0x01,0xbc,0x0a,0x7e]
30553055

3056-
v_rndne_f16 v5, s105
3057-
// GFX11: v_rndne_f16_e32 v5, s105 ; encoding: [0x69,0xbc,0x0a,0x7e]
3056+
v_rndne_f16 v5.l, s105
3057+
// GFX11: v_rndne_f16_e32 v5.l, s105 ; encoding: [0x69,0xbc,0x0a,0x7e]
30583058

3059-
v_rndne_f16 v5, vcc_lo
3060-
// GFX11: v_rndne_f16_e32 v5, vcc_lo ; encoding: [0x6a,0xbc,0x0a,0x7e]
3059+
v_rndne_f16 v5.l, vcc_lo
3060+
// GFX11: v_rndne_f16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xbc,0x0a,0x7e]
30613061

3062-
v_rndne_f16 v5, vcc_hi
3063-
// GFX11: v_rndne_f16_e32 v5, vcc_hi ; encoding: [0x6b,0xbc,0x0a,0x7e]
3062+
v_rndne_f16 v5.l, vcc_hi
3063+
// GFX11: v_rndne_f16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xbc,0x0a,0x7e]
30643064

3065-
v_rndne_f16 v5, ttmp15
3066-
// GFX11: v_rndne_f16_e32 v5, ttmp15 ; encoding: [0x7b,0xbc,0x0a,0x7e]
3065+
v_rndne_f16 v5.l, ttmp15
3066+
// GFX11: v_rndne_f16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xbc,0x0a,0x7e]
30673067

3068-
v_rndne_f16 v5, m0
3069-
// GFX11: v_rndne_f16_e32 v5, m0 ; encoding: [0x7d,0xbc,0x0a,0x7e]
3068+
v_rndne_f16 v5.l, m0
3069+
// GFX11: v_rndne_f16_e32 v5.l, m0 ; encoding: [0x7d,0xbc,0x0a,0x7e]
30703070

3071-
v_rndne_f16 v5, exec_lo
3072-
// GFX11: v_rndne_f16_e32 v5, exec_lo ; encoding: [0x7e,0xbc,0x0a,0x7e]
3071+
v_rndne_f16 v5.l, exec_lo
3072+
// GFX11: v_rndne_f16_e32 v5.l, exec_lo ; encoding: [0x7e,0xbc,0x0a,0x7e]
30733073

3074-
v_rndne_f16 v5, exec_hi
3075-
// GFX11: v_rndne_f16_e32 v5, exec_hi ; encoding: [0x7f,0xbc,0x0a,0x7e]
3074+
v_rndne_f16 v5.l, exec_hi
3075+
// GFX11: v_rndne_f16_e32 v5.l, exec_hi ; encoding: [0x7f,0xbc,0x0a,0x7e]
30763076

3077-
v_rndne_f16 v5, null
3078-
// GFX11: v_rndne_f16_e32 v5, null ; encoding: [0x7c,0xbc,0x0a,0x7e]
3077+
v_rndne_f16 v5.l, null
3078+
// GFX11: v_rndne_f16_e32 v5.l, null ; encoding: [0x7c,0xbc,0x0a,0x7e]
30793079

3080-
v_rndne_f16 v5, -1
3081-
// GFX11: v_rndne_f16_e32 v5, -1 ; encoding: [0xc1,0xbc,0x0a,0x7e]
3080+
v_rndne_f16 v5.l, -1
3081+
// GFX11: v_rndne_f16_e32 v5.l, -1 ; encoding: [0xc1,0xbc,0x0a,0x7e]
30823082

3083-
v_rndne_f16 v5, 0.5
3084-
// GFX11: v_rndne_f16_e32 v5, 0.5 ; encoding: [0xf0,0xbc,0x0a,0x7e]
3083+
v_rndne_f16 v5.l, 0.5
3084+
// GFX11: v_rndne_f16_e32 v5.l, 0.5 ; encoding: [0xf0,0xbc,0x0a,0x7e]
30853085

3086-
v_rndne_f16 v5, src_scc
3087-
// GFX11: v_rndne_f16_e32 v5, src_scc ; encoding: [0xfd,0xbc,0x0a,0x7e]
3086+
v_rndne_f16 v5.l, src_scc
3087+
// GFX11: v_rndne_f16_e32 v5.l, src_scc ; encoding: [0xfd,0xbc,0x0a,0x7e]
30883088

3089-
v_rndne_f16 v127, 0xfe0b
3090-
// GFX11: v_rndne_f16_e32 v127, 0xfe0b ; encoding: [0xff,0xbc,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
3089+
v_rndne_f16 v127.l, 0xfe0b
3090+
// GFX11: v_rndne_f16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xbc,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
3091+
3092+
v_rndne_f16 v5.l, v1.h
3093+
// GFX11: v_rndne_f16_e32 v5.l, v1.h ; encoding: [0x81,0xbd,0x0a,0x7e]
3094+
3095+
v_rndne_f16 v5.l, v127.h
3096+
// GFX11: v_rndne_f16_e32 v5.l, v127.h ; encoding: [0xff,0xbd,0x0a,0x7e]
3097+
3098+
v_rndne_f16 v127.l, 0.5
3099+
// GFX11: v_rndne_f16_e32 v127.l, 0.5 ; encoding: [0xf0,0xbc,0xfe,0x7e]
3100+
3101+
v_rndne_f16 v5.h, src_scc
3102+
// GFX11: v_rndne_f16_e32 v5.h, src_scc ; encoding: [0xfd,0xbc,0x0a,0x7f]
3103+
3104+
v_rndne_f16 v127.h, 0xfe0b
3105+
// GFX11: v_rndne_f16_e32 v127.h, 0xfe0b ; encoding: [0xff,0xbc,0xfe,0x7f,0x0b,0xfe,0x00,0x00]
30913106

30923107
v_rndne_f32 v5, v1
30933108
// GFX11: v_rndne_f32_e32 v5, v1 ; encoding: [0x01,0x47,0x0a,0x7e]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2399,47 +2399,56 @@ v_rcp_iflag_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
23992399
v_rcp_iflag_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
24002400
// GFX11: v_rcp_iflag_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x56,0xfe,0x7f,0xff,0x6f,0x35,0x30]
24012401

2402-
v_rndne_f16 v5, v1 quad_perm:[3,2,1,0]
2403-
// GFX11: v_rndne_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x00,0xff]
2402+
v_rndne_f16 v5.l, v1.l quad_perm:[3,2,1,0]
2403+
// GFX11: v_rndne_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1b,0x00,0xff]
24042404

2405-
v_rndne_f16 v5, v1 quad_perm:[0,1,2,3]
2406-
// GFX11: v_rndne_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0xe4,0x00,0xff]
2405+
v_rndne_f16 v5.l, v1.l quad_perm:[0,1,2,3]
2406+
// GFX11: v_rndne_f16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0xe4,0x00,0xff]
24072407

2408-
v_rndne_f16 v5, v1 row_mirror
2409-
// GFX11: v_rndne_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x40,0x01,0xff]
2408+
v_rndne_f16 v5.l, v1.l row_mirror
2409+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x40,0x01,0xff]
24102410

2411-
v_rndne_f16 v5, v1 row_half_mirror
2412-
// GFX11: v_rndne_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x41,0x01,0xff]
2411+
v_rndne_f16 v5.l, v1.l row_half_mirror
2412+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x41,0x01,0xff]
24132413

2414-
v_rndne_f16 v5, v1 row_shl:1
2415-
// GFX11: v_rndne_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x01,0x01,0xff]
2414+
v_rndne_f16 v5.l, v1.l row_shl:1
2415+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x01,0x01,0xff]
24162416

2417-
v_rndne_f16 v5, v1 row_shl:15
2418-
// GFX11: v_rndne_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x0f,0x01,0xff]
2417+
v_rndne_f16 v5.l, v1.l row_shl:15
2418+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x0f,0x01,0xff]
24192419

2420-
v_rndne_f16 v5, v1 row_shr:1
2421-
// GFX11: v_rndne_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x11,0x01,0xff]
2420+
v_rndne_f16 v5.l, v1.l row_shr:1
2421+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x11,0x01,0xff]
24222422

2423-
v_rndne_f16 v5, v1 row_shr:15
2424-
// GFX11: v_rndne_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1f,0x01,0xff]
2423+
v_rndne_f16 v5.l, v1.l row_shr:15
2424+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x1f,0x01,0xff]
24252425

2426-
v_rndne_f16 v5, v1 row_ror:1
2427-
// GFX11: v_rndne_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x21,0x01,0xff]
2426+
v_rndne_f16 v5.l, v1.l row_ror:1
2427+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x21,0x01,0xff]
24282428

2429-
v_rndne_f16 v5, v1 row_ror:15
2430-
// GFX11: v_rndne_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x2f,0x01,0xff]
2429+
v_rndne_f16 v5.l, v1.l row_ror:15
2430+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x2f,0x01,0xff]
24312431

2432-
v_rndne_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
2433-
// GFX11: v_rndne_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x50,0x01,0xff]
2432+
v_rndne_f16 v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf
2433+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x50,0x01,0xff]
24342434

2435-
v_rndne_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
2436-
// GFX11: v_rndne_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x5f,0x01,0x01]
2435+
v_rndne_f16 v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1
2436+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x5f,0x01,0x01]
24372437

2438-
v_rndne_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
2439-
// GFX11: v_rndne_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x60,0x09,0x13]
2438+
v_rndne_f16 v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
2439+
// GFX11: v_rndne_f16_dpp v5.l, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xbc,0x0a,0x7e,0x01,0x60,0x09,0x13]
24402440

2441-
v_rndne_f16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
2442-
// GFX11: v_rndne_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xbc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
2441+
v_rndne_f16 v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
2442+
// GFX11: v_rndne_f16_dpp v127.l, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xbc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
2443+
2444+
v_rndne_f16 v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
2445+
// GFX11: v_rndne_f16_dpp v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xbc,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
2446+
2447+
v_rndne_f16 v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
2448+
// GFX11: v_rndne_f16_dpp v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xbc,0x0a,0x7f,0x81,0x60,0x09,0x13]
2449+
2450+
v_rndne_f16 v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
2451+
// GFX11: v_rndne_f16_dpp v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xbc,0xfe,0x7f,0xff,0x6f,0x35,0x30]
24432452

24442453
v_rndne_f32 v5, v1 quad_perm:[3,2,1,0]
24452454
// GFX11: v_rndne_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x46,0x0a,0x7e,0x01,0x1b,0x00,0xff]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -593,14 +593,23 @@ v_rcp_iflag_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
593593
v_rcp_iflag_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
594594
// GFX11: v_rcp_iflag_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x56,0xfe,0x7f,0xff,0x00,0x00,0x00]
595595

596-
v_rndne_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
597-
// GFX11: v_rndne_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05]
596+
v_rndne_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0]
597+
// GFX11: v_rndne_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05]
598598

599-
v_rndne_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
600-
// GFX11: v_rndne_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05]
599+
v_rndne_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1
600+
// GFX11: v_rndne_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xbc,0x0a,0x7e,0x01,0x77,0x39,0x05]
601601

602-
v_rndne_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
603-
// GFX11: v_rndne_f16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xbc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
602+
v_rndne_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0]
603+
// GFX11: v_rndne_f16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xbc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
604+
605+
v_rndne_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
606+
// GFX11: v_rndne_f16_dpp v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xbc,0xfe,0x7e,0x7f,0x77,0x39,0x05]
607+
608+
v_rndne_f16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1
609+
// GFX11: v_rndne_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xbc,0x0a,0x7f,0x81,0x77,0x39,0x05]
610+
611+
v_rndne_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
612+
// GFX11: v_rndne_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xbc,0xfe,0x7f,0xff,0x00,0x00,0x00]
604613

605614
v_rndne_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
606615
// GFX11: v_rndne_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x46,0x0a,0x7e,0x01,0x77,0x39,0x05]

llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,12 @@ v_rcp_f16_e32 v5.l, v199.l quad_perm:[3,2,1,0]
833833
v_rndne_f16_e32 v128, 0xfe0b
834834
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
835835

836+
v_rndne_f16_e32 v128.h, 0xfe0b
837+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
838+
839+
v_rndne_f16_e32 v128.l, 0xfe0b
840+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
841+
836842
v_rndne_f16_e32 v255, v1
837843
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
838844

@@ -842,6 +848,24 @@ v_rndne_f16_e32 v255, v1 dpp8:[7,6,5,4,3,2,1,0]
842848
v_rndne_f16_e32 v255, v1 quad_perm:[3,2,1,0]
843849
// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
844850

851+
v_rndne_f16_e32 v255.h, v1.h
852+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
853+
854+
v_rndne_f16_e32 v255.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
855+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
856+
857+
v_rndne_f16_e32 v255.h, v1.h quad_perm:[3,2,1,0]
858+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
859+
860+
v_rndne_f16_e32 v255.l, v1.l
861+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
862+
863+
v_rndne_f16_e32 v255.l, v1.l dpp8:[7,6,5,4,3,2,1,0]
864+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
865+
866+
v_rndne_f16_e32 v255.l, v1.l quad_perm:[3,2,1,0]
867+
// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction
868+
845869
v_rndne_f16_e32 v5, v199
846870
// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
847871

@@ -851,6 +875,24 @@ v_rndne_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
851875
v_rndne_f16_e32 v5, v199 quad_perm:[3,2,1,0]
852876
// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
853877

878+
v_rndne_f16_e32 v5.h, v199.h
879+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
880+
881+
v_rndne_f16_e32 v5.h, v199.h dpp8:[7,6,5,4,3,2,1,0]
882+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
883+
884+
v_rndne_f16_e32 v5.h, v199.h quad_perm:[3,2,1,0]
885+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
886+
887+
v_rndne_f16_e32 v5.l, v199.l
888+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
889+
890+
v_rndne_f16_e32 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0]
891+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
892+
893+
v_rndne_f16_e32 v5.l, v199.l quad_perm:[3,2,1,0]
894+
// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction
895+
854896
v_rsq_f16_e32 v128.h, 0xfe0b
855897
// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction
856898

0 commit comments

Comments
 (0)