Skip to content

Commit 0074e6b

Browse files
committed
[AMDGPU] Add codegen-prepare-addrspacecast-non-null
1 parent 6c39fa9 commit 0074e6b

File tree

1 file changed

+349
-0
lines changed

1 file changed

+349
-0
lines changed
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
5+
6+
; Tests that we can avoid nullptr checks for addrspacecasts from/to priv/local.
7+
;
8+
; Whenever a testcase is successful, we should see the addrspacecast replaced with the intrinsic
9+
; and the resulting code should have no select/cndmask null check for the pointer.
10+
11+
define void @local_to_flat_nonnull_arg(ptr addrspace(3) nonnull %ptr) {
12+
; OPT-LABEL: define void @local_to_flat_nonnull_arg(
13+
; OPT-SAME: ptr addrspace(3) nonnull [[PTR:%.*]]) {
14+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
15+
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
16+
; OPT-NEXT: ret void
17+
;
18+
; DAGISEL-ASM-LABEL: local_to_flat_nonnull_arg:
19+
; DAGISEL-ASM: ; %bb.0:
20+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21+
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
22+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
23+
; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
24+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
25+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
26+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
27+
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
28+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
29+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
30+
; GISEL-ASM-LABEL: local_to_flat_nonnull_arg:
31+
; GISEL-ASM: ; %bb.0:
32+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33+
; GISEL-ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
34+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
35+
; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
36+
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
37+
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
38+
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
39+
; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2
40+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
41+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
42+
%x = addrspacecast ptr addrspace(3) %ptr to ptr
43+
store volatile i32 7, ptr %x
44+
ret void
45+
}
46+
47+
define void @private_to_flat_nonnull_arg(ptr addrspace(5) nonnull %ptr) {
48+
; OPT-LABEL: define void @private_to_flat_nonnull_arg(
49+
; OPT-SAME: ptr addrspace(5) nonnull [[PTR:%.*]]) {
50+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
51+
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
52+
; OPT-NEXT: ret void
53+
;
54+
; DAGISEL-ASM-LABEL: private_to_flat_nonnull_arg:
55+
; DAGISEL-ASM: ; %bb.0:
56+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57+
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
58+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
59+
; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
60+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
61+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
62+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
63+
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
64+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
65+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
66+
; GISEL-ASM-LABEL: private_to_flat_nonnull_arg:
67+
; GISEL-ASM: ; %bb.0:
68+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69+
; GISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
70+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
71+
; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
72+
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
73+
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
74+
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
75+
; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2
76+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
77+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
78+
%x = addrspacecast ptr addrspace(5) %ptr to ptr
79+
store volatile i32 7, ptr %x
80+
ret void
81+
}
82+
83+
define void @flat_to_local_nonnull_arg(ptr nonnull %ptr) {
84+
; OPT-LABEL: define void @flat_to_local_nonnull_arg(
85+
; OPT-SAME: ptr nonnull [[PTR:%.*]]) {
86+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
87+
; OPT-NEXT: store volatile i32 7, ptr addrspace(3) [[X]], align 4
88+
; OPT-NEXT: ret void
89+
;
90+
; ASM-LABEL: flat_to_local_nonnull_arg:
91+
; ASM: ; %bb.0:
92+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93+
; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
94+
; ASM-NEXT: v_mov_b32_e32 v1, 7
95+
; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
96+
; ASM-NEXT: ds_write_b32 v0, v1
97+
; ASM-NEXT: s_waitcnt lgkmcnt(0)
98+
; ASM-NEXT: s_setpc_b64 s[30:31]
99+
%x = addrspacecast ptr %ptr to ptr addrspace(3)
100+
store volatile i32 7, ptr addrspace(3) %x
101+
ret void
102+
}
103+
104+
define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) {
105+
; OPT-LABEL: define void @flat_to_private_nonnull_arg(
106+
; OPT-SAME: ptr nonnull [[PTR:%.*]]) {
107+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
108+
; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[X]], align 4
109+
; OPT-NEXT: ret void
110+
;
111+
; ASM-LABEL: flat_to_private_nonnull_arg:
112+
; ASM: ; %bb.0:
113+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114+
; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
115+
; ASM-NEXT: v_mov_b32_e32 v1, 7
116+
; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
117+
; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
118+
; ASM-NEXT: s_waitcnt vmcnt(0)
119+
; ASM-NEXT: s_setpc_b64 s[30:31]
120+
%x = addrspacecast ptr %ptr to ptr addrspace(5)
121+
store volatile i32 7, ptr addrspace(5) %x
122+
ret void
123+
}
124+
125+
define void @private_alloca_to_flat(ptr %ptr) {
126+
; OPT-LABEL: define void @private_alloca_to_flat(
127+
; OPT-SAME: ptr [[PTR:%.*]]) {
128+
; OPT-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5)
129+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
130+
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
131+
; OPT-NEXT: ret void
132+
;
133+
; DAGISEL-ASM-LABEL: private_alloca_to_flat:
134+
; DAGISEL-ASM: ; %bb.0:
135+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136+
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
137+
; DAGISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32
138+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
139+
; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
140+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
141+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
142+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
143+
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
144+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
145+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
146+
; GISEL-ASM-LABEL: private_alloca_to_flat:
147+
; GISEL-ASM: ; %bb.0:
148+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149+
; GISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
150+
; GISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32
151+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
152+
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
153+
; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2
154+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
155+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
156+
%alloca = alloca i8, addrspace(5)
157+
%x = addrspacecast ptr addrspace(5) %alloca to ptr
158+
store volatile i32 7, ptr %x
159+
ret void
160+
}
161+
162+
@lds = internal unnamed_addr addrspace(3) global i8 undef, align 4
163+
164+
define void @knownbits_on_flat_to_priv(ptr %ptr) {
165+
; OPT-LABEL: define void @knownbits_on_flat_to_priv(
166+
; OPT-SAME: ptr [[PTR:%.*]]) {
167+
; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr [[PTR]] to i64
168+
; OPT-NEXT: [[PTR_OR:%.*]] = or i64 [[PTR_INT]], 15
169+
; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i64 [[PTR_OR]] to ptr
170+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr [[KB_PTR]] to ptr addrspace(5)
171+
; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[X]], align 4
172+
; OPT-NEXT: ret void
173+
;
174+
; DAGISEL-ASM-LABEL: knownbits_on_flat_to_priv:
175+
; DAGISEL-ASM: ; %bb.0:
176+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177+
; DAGISEL-ASM-NEXT: v_or_b32_e32 v0, 15, v0
178+
; DAGISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
179+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
180+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
181+
; DAGISEL-ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
182+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
183+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
184+
; GISEL-ASM-LABEL: knownbits_on_flat_to_priv:
185+
; GISEL-ASM: ; %bb.0:
186+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187+
; GISEL-ASM-NEXT: v_or_b32_e32 v0, 15, v0
188+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
189+
; GISEL-ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
190+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0)
191+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
192+
%ptr.int = ptrtoint ptr %ptr to i64
193+
%ptr.or = or i64 %ptr.int, 15 ; set some low bits
194+
%kb.ptr = inttoptr i64 %ptr.or to ptr
195+
%x = addrspacecast ptr %kb.ptr to ptr addrspace(5)
196+
store volatile i32 7, ptr addrspace(5) %x
197+
ret void
198+
}
199+
200+
define void @knownbits_on_priv_to_flat(ptr addrspace(5) %ptr) {
201+
; OPT-LABEL: define void @knownbits_on_priv_to_flat(
202+
; OPT-SAME: ptr addrspace(5) [[PTR:%.*]]) {
203+
; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32
204+
; OPT-NEXT: [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535
205+
; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5)
206+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(5) [[KB_PTR]] to ptr
207+
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
208+
; OPT-NEXT: ret void
209+
;
210+
; DAGISEL-ASM-LABEL: knownbits_on_priv_to_flat:
211+
; DAGISEL-ASM: ; %bb.0:
212+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213+
; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v0
214+
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
215+
; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
216+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
217+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
218+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
219+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
220+
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
221+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
222+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
223+
; GISEL-ASM-LABEL: knownbits_on_priv_to_flat:
224+
; GISEL-ASM: ; %bb.0:
225+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226+
; GISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
227+
; GISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v0
228+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
229+
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
230+
; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2
231+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
232+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
233+
%ptr.int = ptrtoint ptr addrspace(5) %ptr to i32
234+
%ptr.or = and i32 %ptr.int, 65535 ; ensure low bits are zeroes
235+
%kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5)
236+
%x = addrspacecast ptr addrspace(5) %kb.ptr to ptr
237+
store volatile i32 7, ptr %x
238+
ret void
239+
}
240+
241+
; this would recursive infinitely and we'll give up once we notice it.
242+
define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
243+
; OPT-LABEL: define void @recursive_phis(
244+
; OPT-SAME: i1 [[COND:%.*]], ptr addrspace(5) [[PTR:%.*]]) {
245+
; OPT-NEXT: entry:
246+
; OPT-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5)
247+
; OPT-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]]
248+
; OPT: then:
249+
; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32
250+
; OPT-NEXT: [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535
251+
; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5)
252+
; OPT-NEXT: br label [[FINALLY:%.*]]
253+
; OPT: else:
254+
; OPT-NEXT: [[OTHER_PHI:%.*]] = phi ptr addrspace(5) [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[PHI_PTR:%.*]], [[FINALLY]] ]
255+
; OPT-NEXT: br label [[FINALLY]]
256+
; OPT: finally:
257+
; OPT-NEXT: [[PHI_PTR]] = phi ptr addrspace(5) [ [[KB_PTR]], [[THEN]] ], [ [[OTHER_PHI]], [[ELSE]] ]
258+
; OPT-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(5) [[PHI_PTR]] to ptr
259+
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
260+
; OPT-NEXT: br i1 [[COND]], label [[ELSE]], label [[END:%.*]]
261+
; OPT: end:
262+
; OPT-NEXT: ret void
263+
;
264+
; DAGISEL-ASM-LABEL: recursive_phis:
265+
; DAGISEL-ASM: ; %bb.0: ; %entry
266+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267+
; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 1, v0
268+
; DAGISEL-ASM-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
269+
; DAGISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32
270+
; DAGISEL-ASM-NEXT: s_and_saveexec_b64 s[4:5], vcc
271+
; DAGISEL-ASM-NEXT: ; %bb.1: ; %then
272+
; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1
273+
; DAGISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split
274+
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5]
275+
; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base
276+
; DAGISEL-ASM-NEXT: s_xor_b64 s[6:7], vcc, -1
277+
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], 0
278+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
279+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
280+
; DAGISEL-ASM-NEXT: .LBB7_3: ; %finally
281+
; DAGISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
282+
; DAGISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[6:7]
283+
; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
284+
; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
285+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
286+
; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
287+
; DAGISEL-ASM-NEXT: flat_store_dword v[3:4], v2
288+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
289+
; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[4:5]
290+
; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
291+
; DAGISEL-ASM-NEXT: ; %bb.4: ; %end
292+
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5]
293+
; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
294+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
295+
;
296+
; GISEL-ASM-LABEL: recursive_phis:
297+
; GISEL-ASM: ; %bb.0: ; %entry
298+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299+
; GISEL-ASM-NEXT: v_and_b32_e32 v0, 1, v0
300+
; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
301+
; GISEL-ASM-NEXT: s_xor_b64 s[4:5], vcc, -1
302+
; GISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32
303+
; GISEL-ASM-NEXT: s_and_saveexec_b64 s[6:7], vcc
304+
; GISEL-ASM-NEXT: ; %bb.1: ; %then
305+
; GISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1
306+
; GISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split
307+
; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7]
308+
; GISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base
309+
; GISEL-ASM-NEXT: s_mov_b64 s[6:7], 0
310+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
311+
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
312+
; GISEL-ASM-NEXT: .LBB7_3: ; %finally
313+
; GISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
314+
; GISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[4:5]
315+
; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
316+
; GISEL-ASM-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
317+
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc
318+
; GISEL-ASM-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
319+
; GISEL-ASM-NEXT: flat_store_dword v[3:4], v2
320+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0)
321+
; GISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[6:7]
322+
; GISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
323+
; GISEL-ASM-NEXT: ; %bb.4: ; %end
324+
; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7]
325+
; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
326+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
327+
entry:
328+
%alloca = alloca i8, addrspace(5)
329+
br i1 %cond, label %then, label %else
330+
331+
then:
332+
%ptr.int = ptrtoint ptr addrspace(5) %ptr to i32
333+
%ptr.or = and i32 %ptr.int, 65535 ; ensure low bits are zeroes
334+
%kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5)
335+
br label %finally
336+
337+
else:
338+
%other.phi = phi ptr addrspace(5) [%alloca, %entry], [%phi.ptr, %finally]
339+
br label %finally
340+
341+
finally:
342+
%phi.ptr = phi ptr addrspace(5) [%kb.ptr, %then], [%other.phi, %else]
343+
%x = addrspacecast ptr addrspace(5) %phi.ptr to ptr
344+
store volatile i32 7, ptr %x
345+
br i1 %cond, label %else, label %end
346+
347+
end:
348+
ret void
349+
}

0 commit comments

Comments
 (0)