Skip to content

Commit 55696db

Browse files
committed
AMDGPU: Expand raw ptr buffer atomic fadd tests
These were only checking the no return versions on gfx908
1 parent 907e739 commit 55696db

File tree

3 files changed

+339
-62
lines changed

3 files changed

+339
-62
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll

Lines changed: 0 additions & 62 deletions
This file was deleted.
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
3+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
4+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
5+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
6+
7+
define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
8+
; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
9+
; GFX908: ; %bb.0:
10+
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[4:7], s8 offen
12+
; GFX908-NEXT: s_waitcnt vmcnt(0)
13+
; GFX908-NEXT: s_setpc_b64 s[30:31]
14+
;
15+
; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
16+
; GFX90A: ; %bb.0:
17+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18+
; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[4:7], s8 offen scc
19+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
20+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
21+
;
22+
; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
23+
; GFX940: ; %bb.0:
24+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+
; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s4 offen sc1
26+
; GFX940-NEXT: s_waitcnt vmcnt(0)
27+
; GFX940-NEXT: s_setpc_b64 s[30:31]
28+
;
29+
; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
30+
; GFX12: ; %bb.0:
31+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
32+
; GFX12-NEXT: s_wait_expcnt 0x0
33+
; GFX12-NEXT: s_wait_samplecnt 0x0
34+
; GFX12-NEXT: s_wait_bvhcnt 0x0
35+
; GFX12-NEXT: s_wait_kmcnt 0x0
36+
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s4 offen scope:SCOPE_SYS
37+
; GFX12-NEXT: s_setpc_b64 s[30:31]
38+
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 24)
39+
ret void
40+
}
41+
42+
define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) #0 {
43+
; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
44+
; GFX908: ; %bb.0:
45+
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46+
; GFX908-NEXT: buffer_atomic_add_f32 v0, off, s[4:7], s8
47+
; GFX908-NEXT: s_waitcnt vmcnt(0)
48+
; GFX908-NEXT: s_setpc_b64 s[30:31]
49+
;
50+
; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
51+
; GFX90A: ; %bb.0:
52+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53+
; GFX90A-NEXT: buffer_atomic_add_f32 v0, off, s[4:7], s8
54+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
55+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
56+
;
57+
; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
58+
; GFX940: ; %bb.0:
59+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60+
; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s4
61+
; GFX940-NEXT: s_waitcnt vmcnt(0)
62+
; GFX940-NEXT: s_setpc_b64 s[30:31]
63+
;
64+
; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
65+
; GFX12: ; %bb.0:
66+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
67+
; GFX12-NEXT: s_wait_expcnt 0x0
68+
; GFX12-NEXT: s_wait_samplecnt 0x0
69+
; GFX12-NEXT: s_wait_bvhcnt 0x0
70+
; GFX12-NEXT: s_wait_kmcnt 0x0
71+
; GFX12-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s4
72+
; GFX12-NEXT: s_setpc_b64 s[30:31]
73+
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
74+
ret void
75+
}
76+
77+
define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
78+
; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
79+
; GFX908: ; %bb.0:
80+
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81+
; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[4:7], s8 offen
82+
; GFX908-NEXT: s_waitcnt vmcnt(0)
83+
; GFX908-NEXT: s_setpc_b64 s[30:31]
84+
;
85+
; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
86+
; GFX90A: ; %bb.0:
87+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+
; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[4:7], s8 offen
89+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
90+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
91+
;
92+
; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
93+
; GFX940: ; %bb.0:
94+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95+
; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
96+
; GFX940-NEXT: s_waitcnt vmcnt(0)
97+
; GFX940-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
100+
; GFX12: ; %bb.0:
101+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
102+
; GFX12-NEXT: s_wait_expcnt 0x0
103+
; GFX12-NEXT: s_wait_samplecnt 0x0
104+
; GFX12-NEXT: s_wait_bvhcnt 0x0
105+
; GFX12-NEXT: s_wait_kmcnt 0x0
106+
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
107+
; GFX12-NEXT: s_setpc_b64 s[30:31]
108+
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
109+
ret void
110+
}
111+
112+
define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
113+
; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
114+
; GFX908: ; %bb.0:
115+
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116+
; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, off, s[4:7], s8 offset:92
117+
; GFX908-NEXT: s_waitcnt vmcnt(0)
118+
; GFX908-NEXT: s_setpc_b64 s[30:31]
119+
;
120+
; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
121+
; GFX90A: ; %bb.0:
122+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123+
; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, off, s[4:7], s8 offset:92
124+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
125+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
126+
;
127+
; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
128+
; GFX940: ; %bb.0:
129+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130+
; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
131+
; GFX940-NEXT: s_waitcnt vmcnt(0)
132+
; GFX940-NEXT: s_setpc_b64 s[30:31]
133+
;
134+
; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
135+
; GFX12: ; %bb.0:
136+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
137+
; GFX12-NEXT: s_wait_expcnt 0x0
138+
; GFX12-NEXT: s_wait_samplecnt 0x0
139+
; GFX12-NEXT: s_wait_bvhcnt 0x0
140+
; GFX12-NEXT: s_wait_kmcnt 0x0
141+
; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
142+
; GFX12-NEXT: s_setpc_b64 s[30:31]
143+
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 92, i32 %soffset, i32 0)
144+
ret void
145+
}
146+
147+
define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 {
148+
; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
149+
; GFX908: ; %bb.0:
150+
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151+
; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[4:7], s8 offen slc
152+
; GFX908-NEXT: s_waitcnt vmcnt(0)
153+
; GFX908-NEXT: s_setpc_b64 s[30:31]
154+
;
155+
; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
156+
; GFX90A: ; %bb.0:
157+
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158+
; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[4:7], s8 offen slc
159+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
160+
; GFX90A-NEXT: s_setpc_b64 s[30:31]
161+
;
162+
; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
163+
; GFX940: ; %bb.0:
164+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165+
; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s4 offen nt
166+
; GFX940-NEXT: s_waitcnt vmcnt(0)
167+
; GFX940-NEXT: s_setpc_b64 s[30:31]
168+
;
169+
; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
170+
; GFX12: ; %bb.0:
171+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
172+
; GFX12-NEXT: s_wait_expcnt 0x0
173+
; GFX12-NEXT: s_wait_samplecnt 0x0
174+
; GFX12-NEXT: s_wait_bvhcnt 0x0
175+
; GFX12-NEXT: s_wait_kmcnt 0x0
176+
; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT
177+
; GFX12-NEXT: s_setpc_b64 s[30:31]
178+
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2)
179+
ret void
180+
}
181+
182+
declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
183+
declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
184+
185+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)