Skip to content

Commit 156faff

Browse files
committed
test updates
1 parent b68fc28 commit 156faff

File tree

3 files changed

+88
-0
lines changed

3 files changed

+88
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
55
; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
66
; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
7+
; RUN: llc < %s -march=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s
78

89
define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
910
; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
@@ -29,6 +30,12 @@ define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
2930
; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
3031
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
3132
; GFX11-PACKED-NEXT: ; return to shader part epilog
33+
;
34+
; GFX12-PACKED-LABEL: tbuffer_load_d16_x:
35+
; GFX12-PACKED: ; %bb.0: ; %main_body
36+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_32_FLOAT]
37+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
38+
; GFX12-PACKED-NEXT: ; return to shader part epilog
3239
main_body:
3340
%data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
3441
ret half %data
@@ -62,6 +69,13 @@ define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
6269
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
6370
; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
6471
; GFX11-PACKED-NEXT: ; return to shader part epilog
72+
;
73+
; GFX12-PACKED-LABEL: tbuffer_load_d16_xy:
74+
; GFX12-PACKED: ; %bb.0: ; %main_body
75+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xy v0, off, s[0:3], null format:[BUF_FMT_32_FLOAT]
76+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
77+
; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
78+
; GFX12-PACKED-NEXT: ; return to shader part epilog
6579
main_body:
6680
%data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
6781
%elt = extractelement <2 x half> %data, i32 1
@@ -96,6 +110,13 @@ define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
96110
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
97111
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1
98112
; GFX11-PACKED-NEXT: ; return to shader part epilog
113+
;
114+
; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz:
115+
; GFX12-PACKED: ; %bb.0: ; %main_body
116+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT]
117+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
118+
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, v1
119+
; GFX12-PACKED-NEXT: ; return to shader part epilog
99120
main_body:
100121
%data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
101122
%elt = extractelement <3 x half> %data, i32 2
@@ -130,6 +151,13 @@ define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
130151
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
131152
; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
132153
; GFX11-PACKED-NEXT: ; return to shader part epilog
154+
;
155+
; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw:
156+
; GFX12-PACKED: ; %bb.0: ; %main_body
157+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT]
158+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
159+
; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
160+
; GFX12-PACKED-NEXT: ; return to shader part epilog
133161
main_body:
134162
%data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
135163
%elt = extractelement <4 x half> %data, i32 3

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
; RUN: llc -mcpu=gfx900 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
44
; RUN: llc -mcpu=gfx810 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
55
; RUN: llc -mcpu=gfx1100 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
6+
; RUN: llc -mcpu=gfx1200 -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
7+
68
@esgs_ring = external addrspace(3) global [0 x i32], align 65536
79

810
define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
@@ -111,6 +113,32 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
111113
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v5
112114
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v1, 0xffff, v6
113115
; GFX11-NEXT: ds_store_2addr_b32 v2, v0, v1 offset0:7 offset1:8
116+
;
117+
; GFX12-LABEL: main:
118+
; GFX12: ; %bb.0: ; %bb
119+
; GFX12-NEXT: s_mov_b32 s1, exec_lo
120+
; GFX12-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
121+
; GFX12-NEXT: v_readfirstlane_b32 s4, v0
122+
; GFX12-NEXT: v_readfirstlane_b32 s5, v1
123+
; GFX12-NEXT: v_readfirstlane_b32 s6, v2
124+
; GFX12-NEXT: v_readfirstlane_b32 s7, v3
125+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
126+
; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1]
127+
; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3]
128+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
129+
; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0
130+
; GFX12-NEXT: s_and_saveexec_b32 s0, s0
131+
; GFX12-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[4:7], null idxen
132+
; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
133+
; GFX12-NEXT: ; implicit-def: $vgpr4
134+
; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0
135+
; GFX12-NEXT: s_cbranch_execnz .LBB0_1
136+
; GFX12-NEXT: ; %bb.2:
137+
; GFX12-NEXT: s_mov_b32 exec_lo, s1
138+
; GFX12-NEXT: s_waitcnt vmcnt(0)
139+
; GFX12-NEXT: v_lshrrev_b32_e32 v0, 16, v5
140+
; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v1, 0xffff, v6
141+
; GFX12-NEXT: ds_store_2addr_b32 v2, v0, v1 offset0:7 offset1:8
114142
bb:
115143
%i = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 undef)
116144
%i2 = call nsz arcp <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32> %arg, i32 %arg1, i32 0, i32 0, i32 0)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
55
; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
66
; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
7+
; RUN: llc < %s -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s
78

89
define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
910
; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
@@ -33,6 +34,13 @@ define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
3334
; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
3435
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
3536
; GFX11-PACKED-NEXT: ; return to shader part epilog
37+
;
38+
; GFX12-PACKED-LABEL: tbuffer_load_d16_x:
39+
; GFX12-PACKED: ; %bb.0: ; %main_body
40+
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
41+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
42+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
43+
; GFX12-PACKED-NEXT: ; return to shader part epilog
3644
main_body:
3745
%data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
3846
ret half %data
@@ -70,6 +78,14 @@ define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
7078
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
7179
; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
7280
; GFX11-PACKED-NEXT: ; return to shader part epilog
81+
;
82+
; GFX12-PACKED-LABEL: tbuffer_load_d16_xy:
83+
; GFX12-PACKED: ; %bb.0: ; %main_body
84+
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
85+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xy v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
86+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
87+
; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
88+
; GFX12-PACKED-NEXT: ; return to shader part epilog
7389
main_body:
7490
%data = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
7591
%elt = extractelement <2 x half> %data, i32 1
@@ -108,6 +124,14 @@ define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
108124
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
109125
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1
110126
; GFX11-PACKED-NEXT: ; return to shader part epilog
127+
;
128+
; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz:
129+
; GFX12-PACKED: ; %bb.0: ; %main_body
130+
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
131+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
132+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
133+
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, v1
134+
; GFX12-PACKED-NEXT: ; return to shader part epilog
111135
main_body:
112136
%data = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
113137
%elt = extractelement <3 x half> %data, i32 2
@@ -146,6 +170,14 @@ define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
146170
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
147171
; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
148172
; GFX11-PACKED-NEXT: ; return to shader part epilog
173+
;
174+
; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw:
175+
; GFX12-PACKED: ; %bb.0: ; %main_body
176+
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0
177+
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
178+
; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0)
179+
; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
180+
; GFX12-PACKED-NEXT: ; return to shader part epilog
149181
main_body:
150182
%data = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
151183
%elt = extractelement <4 x half> %data, i32 3

0 commit comments

Comments
 (0)