Skip to content

Commit 0b77b19

Browse files
authored
[AMDGPU] Add test to show s_cselect generation from uniform select (#79384)
1 parent 7ff4887 commit 0b77b19

File tree

1 file changed

+219
-0
lines changed

1 file changed

+219
-0
lines changed
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A %s
3+
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940 %s
4+
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX1030 %s
5+
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100 %s
6+
7+
define amdgpu_kernel void @test_insert_extract(i32 %p, i32 %q) {
8+
; GFX90A-LABEL: test_insert_extract:
9+
; GFX90A: ; %bb.0: ; %entry
10+
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11+
; GFX90A-NEXT: s_mov_b32 s2, 0
12+
; GFX90A-NEXT: s_and_b64 vcc, exec, -1
13+
; GFX90A-NEXT: s_mov_b32 s3, 0
14+
; GFX90A-NEXT: s_mov_b32 s4, 0
15+
; GFX90A-NEXT: s_mov_b32 s5, 0
16+
; GFX90A-NEXT: s_mov_b32 s6, 0
17+
; GFX90A-NEXT: .LBB0_1: ; %for.body
18+
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
19+
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
20+
; GFX90A-NEXT: s_cmp_eq_u32 s1, 1
21+
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
22+
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
23+
; GFX90A-NEXT: s_cselect_b32 s7, s4, s3
24+
; GFX90A-NEXT: s_cmp_eq_u32 s1, 2
25+
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
26+
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
27+
; GFX90A-NEXT: s_cselect_b32 s7, s5, s7
28+
; GFX90A-NEXT: s_cmp_eq_u32 s1, 3
29+
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
30+
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
31+
; GFX90A-NEXT: s_cselect_b32 s7, s6, s7
32+
; GFX90A-NEXT: s_or_b32 s7, s7, s0
33+
; GFX90A-NEXT: s_cmp_eq_u32 s1, 1
34+
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
35+
; GFX90A-NEXT: s_and_b64 s[10:11], s[8:9], exec
36+
; GFX90A-NEXT: s_cselect_b32 s4, s7, s4
37+
; GFX90A-NEXT: s_cmp_eq_u32 s1, 3
38+
; GFX90A-NEXT: s_cselect_b64 s[10:11], -1, 0
39+
; GFX90A-NEXT: s_and_b64 s[12:13], s[10:11], exec
40+
; GFX90A-NEXT: s_cselect_b32 s6, s7, s6
41+
; GFX90A-NEXT: s_cmp_eq_u32 s1, 2
42+
; GFX90A-NEXT: s_cselect_b64 s[12:13], -1, 0
43+
; GFX90A-NEXT: s_and_b64 s[14:15], s[12:13], exec
44+
; GFX90A-NEXT: s_cselect_b32 s5, s7, s5
45+
; GFX90A-NEXT: s_cmp_eq_u32 s1, 0
46+
; GFX90A-NEXT: s_cselect_b32 s3, s7, s3
47+
; GFX90A-NEXT: s_or_b64 s[8:9], s[12:13], s[8:9]
48+
; GFX90A-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
49+
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
50+
; GFX90A-NEXT: s_cselect_b32 s2, 0, s2
51+
; GFX90A-NEXT: s_mov_b64 vcc, vcc
52+
; GFX90A-NEXT: s_cbranch_vccnz .LBB0_1
53+
; GFX90A-NEXT: ; %bb.2: ; %DummyReturnBlock
54+
; GFX90A-NEXT: s_endpgm
55+
;
56+
; GFX940-LABEL: test_insert_extract:
57+
; GFX940: ; %bb.0: ; %entry
58+
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
59+
; GFX940-NEXT: s_mov_b32 s2, 0
60+
; GFX940-NEXT: s_and_b64 vcc, exec, -1
61+
; GFX940-NEXT: s_mov_b32 s3, 0
62+
; GFX940-NEXT: s_mov_b32 s4, 0
63+
; GFX940-NEXT: s_mov_b32 s5, 0
64+
; GFX940-NEXT: s_mov_b32 s6, 0
65+
; GFX940-NEXT: .LBB0_1: ; %for.body
66+
; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
67+
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
68+
; GFX940-NEXT: s_cmp_eq_u32 s1, 1
69+
; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0
70+
; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
71+
; GFX940-NEXT: s_cselect_b32 s7, s4, s3
72+
; GFX940-NEXT: s_cmp_eq_u32 s1, 2
73+
; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0
74+
; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
75+
; GFX940-NEXT: s_cselect_b32 s7, s5, s7
76+
; GFX940-NEXT: s_cmp_eq_u32 s1, 3
77+
; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0
78+
; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
79+
; GFX940-NEXT: s_cselect_b32 s7, s6, s7
80+
; GFX940-NEXT: s_or_b32 s7, s7, s0
81+
; GFX940-NEXT: s_cmp_eq_u32 s1, 1
82+
; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0
83+
; GFX940-NEXT: s_and_b64 s[10:11], s[8:9], exec
84+
; GFX940-NEXT: s_cselect_b32 s4, s7, s4
85+
; GFX940-NEXT: s_cmp_eq_u32 s1, 3
86+
; GFX940-NEXT: s_cselect_b64 s[10:11], -1, 0
87+
; GFX940-NEXT: s_and_b64 s[12:13], s[10:11], exec
88+
; GFX940-NEXT: s_cselect_b32 s6, s7, s6
89+
; GFX940-NEXT: s_cmp_eq_u32 s1, 2
90+
; GFX940-NEXT: s_cselect_b64 s[12:13], -1, 0
91+
; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec
92+
; GFX940-NEXT: s_cselect_b32 s5, s7, s5
93+
; GFX940-NEXT: s_cmp_eq_u32 s1, 0
94+
; GFX940-NEXT: s_cselect_b32 s3, s7, s3
95+
; GFX940-NEXT: s_or_b64 s[8:9], s[12:13], s[8:9]
96+
; GFX940-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
97+
; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec
98+
; GFX940-NEXT: s_cselect_b32 s2, 0, s2
99+
; GFX940-NEXT: s_mov_b64 vcc, vcc
100+
; GFX940-NEXT: s_cbranch_vccnz .LBB0_1
101+
; GFX940-NEXT: ; %bb.2: ; %DummyReturnBlock
102+
; GFX940-NEXT: s_endpgm
103+
;
104+
; GFX1030-LABEL: test_insert_extract:
105+
; GFX1030: ; %bb.0: ; %entry
106+
; GFX1030-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
107+
; GFX1030-NEXT: s_mov_b32 s2, 0
108+
; GFX1030-NEXT: s_mov_b32 s3, 0
109+
; GFX1030-NEXT: s_mov_b32 s4, 0
110+
; GFX1030-NEXT: s_mov_b32 s5, 0
111+
; GFX1030-NEXT: s_mov_b32 s6, 0
112+
; GFX1030-NEXT: s_mov_b32 vcc_lo, exec_lo
113+
; GFX1030-NEXT: .p2align 6
114+
; GFX1030-NEXT: .LBB0_1: ; %for.body
115+
; GFX1030-NEXT: ; =>This Inner Loop Header: Depth=1
116+
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
117+
; GFX1030-NEXT: s_cmp_eq_u32 s1, 1
118+
; GFX1030-NEXT: s_cselect_b32 s7, -1, 0
119+
; GFX1030-NEXT: s_and_b32 s7, s7, exec_lo
120+
; GFX1030-NEXT: s_cselect_b32 s7, s4, s3
121+
; GFX1030-NEXT: s_cmp_eq_u32 s1, 2
122+
; GFX1030-NEXT: s_cselect_b32 s8, -1, 0
123+
; GFX1030-NEXT: s_and_b32 s8, s8, exec_lo
124+
; GFX1030-NEXT: s_cselect_b32 s7, s5, s7
125+
; GFX1030-NEXT: s_cmp_eq_u32 s1, 3
126+
; GFX1030-NEXT: s_cselect_b32 s8, -1, 0
127+
; GFX1030-NEXT: s_and_b32 s8, s8, exec_lo
128+
; GFX1030-NEXT: s_cselect_b32 s7, s6, s7
129+
; GFX1030-NEXT: s_or_b32 s7, s7, s0
130+
; GFX1030-NEXT: s_cmp_eq_u32 s1, 1
131+
; GFX1030-NEXT: s_cselect_b32 s8, -1, 0
132+
; GFX1030-NEXT: s_and_b32 s9, s8, exec_lo
133+
; GFX1030-NEXT: s_cselect_b32 s4, s7, s4
134+
; GFX1030-NEXT: s_cmp_eq_u32 s1, 3
135+
; GFX1030-NEXT: s_cselect_b32 s9, -1, 0
136+
; GFX1030-NEXT: s_and_b32 s10, s9, exec_lo
137+
; GFX1030-NEXT: s_cselect_b32 s6, s7, s6
138+
; GFX1030-NEXT: s_cmp_eq_u32 s1, 2
139+
; GFX1030-NEXT: s_cselect_b32 s10, -1, 0
140+
; GFX1030-NEXT: s_and_b32 s11, s10, exec_lo
141+
; GFX1030-NEXT: s_cselect_b32 s5, s7, s5
142+
; GFX1030-NEXT: s_cmp_eq_u32 s1, 0
143+
; GFX1030-NEXT: s_cselect_b32 s3, s7, s3
144+
; GFX1030-NEXT: s_or_b32 s7, s10, s8
145+
; GFX1030-NEXT: s_or_b32 s7, s9, s7
146+
; GFX1030-NEXT: s_and_b32 s7, s7, exec_lo
147+
; GFX1030-NEXT: s_cselect_b32 s2, 0, s2
148+
; GFX1030-NEXT: s_cbranch_vccnz .LBB0_1
149+
; GFX1030-NEXT: ; %bb.2: ; %DummyReturnBlock
150+
; GFX1030-NEXT: s_endpgm
151+
;
152+
; GFX1100-LABEL: test_insert_extract:
153+
; GFX1100: ; %bb.0: ; %entry
154+
; GFX1100-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
155+
; GFX1100-NEXT: s_mov_b32 s2, 0
156+
; GFX1100-NEXT: s_mov_b32 s3, 0
157+
; GFX1100-NEXT: s_mov_b32 s4, 0
158+
; GFX1100-NEXT: s_mov_b32 s5, 0
159+
; GFX1100-NEXT: s_mov_b32 s6, 0
160+
; GFX1100-NEXT: s_mov_b32 vcc_lo, exec_lo
161+
; GFX1100-NEXT: .p2align 6
162+
; GFX1100-NEXT: .LBB0_1: ; %for.body
163+
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
164+
; GFX1100-NEXT: s_waitcnt lgkmcnt(0)
165+
; GFX1100-NEXT: s_cmp_eq_u32 s1, 1
166+
; GFX1100-NEXT: s_cselect_b32 s7, -1, 0
167+
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
168+
; GFX1100-NEXT: s_and_b32 s7, s7, exec_lo
169+
; GFX1100-NEXT: s_cselect_b32 s7, s4, s3
170+
; GFX1100-NEXT: s_cmp_eq_u32 s1, 2
171+
; GFX1100-NEXT: s_cselect_b32 s8, -1, 0
172+
; GFX1100-NEXT: s_and_b32 s8, s8, exec_lo
173+
; GFX1100-NEXT: s_cselect_b32 s7, s5, s7
174+
; GFX1100-NEXT: s_cmp_eq_u32 s1, 3
175+
; GFX1100-NEXT: s_cselect_b32 s8, -1, 0
176+
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
177+
; GFX1100-NEXT: s_and_b32 s8, s8, exec_lo
178+
; GFX1100-NEXT: s_cselect_b32 s7, s6, s7
179+
; GFX1100-NEXT: s_or_b32 s7, s7, s0
180+
; GFX1100-NEXT: s_cmp_eq_u32 s1, 1
181+
; GFX1100-NEXT: s_cselect_b32 s8, -1, 0
182+
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
183+
; GFX1100-NEXT: s_and_b32 s9, s8, exec_lo
184+
; GFX1100-NEXT: s_cselect_b32 s4, s7, s4
185+
; GFX1100-NEXT: s_cmp_eq_u32 s1, 3
186+
; GFX1100-NEXT: s_cselect_b32 s9, -1, 0
187+
; GFX1100-NEXT: s_and_b32 s10, s9, exec_lo
188+
; GFX1100-NEXT: s_cselect_b32 s6, s7, s6
189+
; GFX1100-NEXT: s_cmp_eq_u32 s1, 2
190+
; GFX1100-NEXT: s_cselect_b32 s10, -1, 0
191+
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
192+
; GFX1100-NEXT: s_and_b32 s11, s10, exec_lo
193+
; GFX1100-NEXT: s_cselect_b32 s5, s7, s5
194+
; GFX1100-NEXT: s_cmp_eq_u32 s1, 0
195+
; GFX1100-NEXT: s_cselect_b32 s3, s7, s3
196+
; GFX1100-NEXT: s_or_b32 s7, s10, s8
197+
; GFX1100-NEXT: s_or_b32 s7, s9, s7
198+
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
199+
; GFX1100-NEXT: s_and_b32 s7, s7, exec_lo
200+
; GFX1100-NEXT: s_cselect_b32 s2, 0, s2
201+
; GFX1100-NEXT: s_cbranch_vccnz .LBB0_1
202+
; GFX1100-NEXT: ; %bb.2: ; %DummyReturnBlock
203+
; GFX1100-NEXT: s_endpgm
204+
entry:
205+
%init = insertelement <4 x i32> zeroinitializer, i32 0, i64 0
206+
br label %for.body
207+
208+
for.body: ; preds = %for.body, %entry
209+
%x1 = phi <4 x i32> [ %init, %entry ], [ %i4, %for.body ]
210+
%x2 = phi <4 x i32> [ zeroinitializer, %entry ], [ %i2, %for.body ]
211+
%idxprom = zext i32 %q to i64
212+
%e1 = extractelement <4 x i32> %x2, i64 %idxprom
213+
%add = or i32 %e1, %p
214+
%i2 = insertelement <4 x i32> %x2, i32 %add, i64 %idxprom
215+
%e3 = extractelement <4 x i32> %x1, i64 %idxprom
216+
%i4 = insertelement <4 x i32> %x1, i32 %e3, i64 0
217+
br label %for.body
218+
}
219+

0 commit comments

Comments
 (0)