Skip to content

Commit f16c677

Browse files
committed
[SDAG][tests] add some test cases covering an add-based rotate
1 parent 34fa037 commit f16c677

File tree

4 files changed

+1148
-0
lines changed

4 files changed

+1148
-0
lines changed
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
3+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
define i32 @test_simple_rotl(i32 %x) {
8+
; SI-LABEL: test_simple_rotl:
9+
; SI: ; %bb.0:
10+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; SI-NEXT: v_alignbit_b32 v0, v0, v0, 25
12+
; SI-NEXT: s_setpc_b64 s[30:31]
13+
;
14+
; VI-LABEL: test_simple_rotl:
15+
; VI: ; %bb.0:
16+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17+
; VI-NEXT: v_alignbit_b32 v0, v0, v0, 25
18+
; VI-NEXT: s_setpc_b64 s[30:31]
19+
%shl = shl i32 %x, 7
20+
%shr = lshr i32 %x, 25
21+
%add = add i32 %shl, %shr
22+
ret i32 %add
23+
}
24+
25+
define i32 @test_simple_rotr(i32 %x) {
26+
; SI-LABEL: test_simple_rotr:
27+
; SI: ; %bb.0:
28+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; SI-NEXT: v_alignbit_b32 v0, v0, v0, 7
30+
; SI-NEXT: s_setpc_b64 s[30:31]
31+
;
32+
; VI-LABEL: test_simple_rotr:
33+
; VI: ; %bb.0:
34+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35+
; VI-NEXT: v_alignbit_b32 v0, v0, v0, 7
36+
; VI-NEXT: s_setpc_b64 s[30:31]
37+
%shr = lshr i32 %x, 7
38+
%shl = shl i32 %x, 25
39+
%add = add i32 %shr, %shl
40+
ret i32 %add
41+
}
42+
43+
define i32 @test_rotl_var(i32 %x, i32 %y) {
44+
; SI-LABEL: test_rotl_var:
45+
; SI: ; %bb.0:
46+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47+
; SI-NEXT: v_lshlrev_b32_e32 v2, v1, v0
48+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
49+
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
50+
; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
51+
; SI-NEXT: s_setpc_b64 s[30:31]
52+
;
53+
; VI-LABEL: test_rotl_var:
54+
; VI: ; %bb.0:
55+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56+
; VI-NEXT: v_lshlrev_b32_e32 v2, v1, v0
57+
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
58+
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
59+
; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
60+
; VI-NEXT: s_setpc_b64 s[30:31]
61+
%shl = shl i32 %x, %y
62+
%sub = sub i32 32, %y
63+
%shr = lshr i32 %x, %sub
64+
%add = add i32 %shl, %shr
65+
ret i32 %add
66+
}
67+
68+
define i32 @test_rotr_var(i32 %x, i32 %y) {
69+
; SI-LABEL: test_rotr_var:
70+
; SI: ; %bb.0:
71+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; SI-NEXT: v_lshrrev_b32_e32 v2, v1, v0
73+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
74+
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
75+
; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
76+
; SI-NEXT: s_setpc_b64 s[30:31]
77+
;
78+
; VI-LABEL: test_rotr_var:
79+
; VI: ; %bb.0:
80+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81+
; VI-NEXT: v_lshrrev_b32_e32 v2, v1, v0
82+
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
83+
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
84+
; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
85+
; VI-NEXT: s_setpc_b64 s[30:31]
86+
%shr = lshr i32 %x, %y
87+
%sub = sub i32 32, %y
88+
%shl = shl i32 %x, %sub
89+
%add = add i32 %shr, %shl
90+
ret i32 %add
91+
}
92+
93+
define i32 @test_invalid_rotl_var_and(i32 %x, i32 %y) {
94+
; SI-LABEL: test_invalid_rotl_var_and:
95+
; SI: ; %bb.0:
96+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97+
; SI-NEXT: v_lshlrev_b32_e32 v2, v1, v0
98+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 0, v1
99+
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
100+
; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2
101+
; SI-NEXT: s_setpc_b64 s[30:31]
102+
;
103+
; VI-LABEL: test_invalid_rotl_var_and:
104+
; VI: ; %bb.0:
105+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106+
; VI-NEXT: v_lshlrev_b32_e32 v2, v1, v0
107+
; VI-NEXT: v_sub_u32_e32 v1, vcc, 0, v1
108+
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
109+
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
110+
; VI-NEXT: s_setpc_b64 s[30:31]
111+
%shr = shl i32 %x, %y
112+
%sub = sub nsw i32 0, %y
113+
%and = and i32 %sub, 31
114+
%shl = lshr i32 %x, %and
115+
%add = add i32 %shl, %shr
116+
ret i32 %add
117+
}
118+
119+
define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) {
120+
; SI-LABEL: test_invalid_rotr_var_and:
121+
; SI: ; %bb.0:
122+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123+
; SI-NEXT: v_lshrrev_b32_e32 v2, v1, v0
124+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 0, v1
125+
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
126+
; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
127+
; SI-NEXT: s_setpc_b64 s[30:31]
128+
;
129+
; VI-LABEL: test_invalid_rotr_var_and:
130+
; VI: ; %bb.0:
131+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132+
; VI-NEXT: v_lshrrev_b32_e32 v2, v1, v0
133+
; VI-NEXT: v_sub_u32_e32 v1, vcc, 0, v1
134+
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
135+
; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
136+
; VI-NEXT: s_setpc_b64 s[30:31]
137+
%shr = lshr i32 %x, %y
138+
%sub = sub nsw i32 0, %y
139+
%and = and i32 %sub, 31
140+
%shl = shl i32 %x, %and
141+
%add = add i32 %shr, %shl
142+
ret i32 %add
143+
}
144+
145+
define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
146+
; SI-LABEL: test_fshl_special_case:
147+
; SI: ; %bb.0:
148+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149+
; SI-NEXT: v_lshlrev_b32_e32 v0, v2, v0
150+
; SI-NEXT: v_lshrrev_b32_e32 v1, 1, v1
151+
; SI-NEXT: v_xor_b32_e32 v2, 31, v2
152+
; SI-NEXT: v_lshrrev_b32_e32 v1, v2, v1
153+
; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
154+
; SI-NEXT: s_setpc_b64 s[30:31]
155+
;
156+
; VI-LABEL: test_fshl_special_case:
157+
; VI: ; %bb.0:
158+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159+
; VI-NEXT: v_lshlrev_b32_e32 v0, v2, v0
160+
; VI-NEXT: v_lshrrev_b32_e32 v1, 1, v1
161+
; VI-NEXT: v_xor_b32_e32 v2, 31, v2
162+
; VI-NEXT: v_lshrrev_b32_e32 v1, v2, v1
163+
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
164+
; VI-NEXT: s_setpc_b64 s[30:31]
165+
%shl = shl i32 %x0, %y
166+
%srli = lshr i32 %x1, 1
167+
%x = xor i32 %y, 31
168+
%srlo = lshr i32 %srli, %x
169+
%o = add i32 %shl, %srlo
170+
ret i32 %o
171+
}
172+
173+
define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) {
174+
; SI-LABEL: test_fshr_special_case:
175+
; SI: ; %bb.0:
176+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177+
; SI-NEXT: v_lshrrev_b32_e32 v1, v2, v1
178+
; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
179+
; SI-NEXT: v_xor_b32_e32 v2, 31, v2
180+
; SI-NEXT: v_lshlrev_b32_e32 v0, v2, v0
181+
; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
182+
; SI-NEXT: s_setpc_b64 s[30:31]
183+
;
184+
; VI-LABEL: test_fshr_special_case:
185+
; VI: ; %bb.0:
186+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187+
; VI-NEXT: v_lshrrev_b32_e32 v1, v2, v1
188+
; VI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
189+
; VI-NEXT: v_xor_b32_e32 v2, 31, v2
190+
; VI-NEXT: v_lshlrev_b32_e32 v0, v2, v0
191+
; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
192+
; VI-NEXT: s_setpc_b64 s[30:31]
193+
%shl = lshr i32 %x1, %y
194+
%srli = shl i32 %x0, 1
195+
%x = xor i32 %y, 31
196+
%srlo = shl i32 %srli, %x
197+
%o = add i32 %shl, %srlo
198+
ret i32 %o
199+
}
200+
201+
define i64 @test_rotl_udiv_special_case(i64 %i) {
202+
; SI-LABEL: test_rotl_udiv_special_case:
203+
; SI: ; %bb.0:
204+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205+
; SI-NEXT: s_mov_b32 s4, 0xaaaaaaaa
206+
; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab
207+
; SI-NEXT: v_mul_hi_u32 v2, v0, s4
208+
; SI-NEXT: v_mul_lo_u32 v3, v0, s4
209+
; SI-NEXT: v_mul_hi_u32 v4, v1, s5
210+
; SI-NEXT: v_mul_lo_u32 v5, v1, s5
211+
; SI-NEXT: v_mul_hi_u32 v0, v0, s5
212+
; SI-NEXT: v_mul_hi_u32 v6, v1, s4
213+
; SI-NEXT: v_mul_lo_u32 v1, v1, s4
214+
; SI-NEXT: v_add_i32_e32 v0, vcc, v5, v0
215+
; SI-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
216+
; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0
217+
; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
218+
; SI-NEXT: v_add_i32_e32 v0, vcc, v4, v0
219+
; SI-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
220+
; SI-NEXT: v_add_i32_e32 v2, vcc, v1, v0
221+
; SI-NEXT: v_addc_u32_e32 v3, vcc, v6, v3, vcc
222+
; SI-NEXT: v_lshr_b64 v[0:1], v[2:3], 5
223+
; SI-NEXT: v_lshlrev_b32_e32 v0, 27, v2
224+
; SI-NEXT: v_and_b32_e32 v0, 0xf0000000, v0
225+
; SI-NEXT: v_or_b32_e32 v1, v0, v1
226+
; SI-NEXT: v_alignbit_b32 v0, v3, v2, 5
227+
; SI-NEXT: s_setpc_b64 s[30:31]
228+
;
229+
; VI-LABEL: test_rotl_udiv_special_case:
230+
; VI: ; %bb.0:
231+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232+
; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
233+
; VI-NEXT: v_mul_hi_u32 v2, v0, s4
234+
; VI-NEXT: v_mov_b32_e32 v3, 0
235+
; VI-NEXT: s_mov_b32 s6, 0xaaaaaaaa
236+
; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
237+
; VI-NEXT: v_mov_b32_e32 v2, v4
238+
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[2:3]
239+
; VI-NEXT: v_add_u32_e32 v2, vcc, v5, v3
240+
; VI-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
241+
; VI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, s6, v[2:3]
242+
; VI-NEXT: v_lshrrev_b64 v[2:3], 5, v[0:1]
243+
; VI-NEXT: v_lshlrev_b32_e32 v2, 27, v0
244+
; VI-NEXT: v_alignbit_b32 v0, v1, v0, 5
245+
; VI-NEXT: v_and_b32_e32 v1, 0xf0000000, v2
246+
; VI-NEXT: v_or_b32_e32 v1, v1, v3
247+
; VI-NEXT: s_setpc_b64 s[30:31]
248+
%lhs_div = udiv i64 %i, 3
249+
%rhs_div = udiv i64 %i, 48
250+
%lhs_shift = shl i64 %lhs_div, 60
251+
%out = add i64 %lhs_shift, %rhs_div
252+
ret i64 %out
253+
}
254+
255+
define i32 @test_rotl_mul_special_case(i32 %i) {
256+
; SI-LABEL: test_rotl_mul_special_case:
257+
; SI: ; %bb.0:
258+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259+
; SI-NEXT: v_mul_lo_u32 v0, v0, 9
260+
; SI-NEXT: v_alignbit_b32 v0, v0, v0, 25
261+
; SI-NEXT: s_setpc_b64 s[30:31]
262+
;
263+
; VI-LABEL: test_rotl_mul_special_case:
264+
; VI: ; %bb.0:
265+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266+
; VI-NEXT: v_mul_lo_u32 v0, v0, 9
267+
; VI-NEXT: v_alignbit_b32 v0, v0, v0, 25
268+
; VI-NEXT: s_setpc_b64 s[30:31]
269+
%lhs_mul = mul i32 %i, 9
270+
%rhs_mul = mul i32 %i, 1152
271+
%lhs_shift = lshr i32 %lhs_mul, 25
272+
%out = add i32 %lhs_shift, %rhs_mul
273+
ret i32 %out
274+
}
275+
276+
define i64 @test_rotl_mul_with_mask_special_case(i64 %i) {
277+
; SI-LABEL: test_rotl_mul_with_mask_special_case:
278+
; SI: ; %bb.0:
279+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280+
; SI-NEXT: v_mul_lo_u32 v1, v1, 9
281+
; SI-NEXT: v_mul_hi_u32 v2, v0, 9
282+
; SI-NEXT: v_add_i32_e32 v1, vcc, v2, v1
283+
; SI-NEXT: v_alignbit_b32 v0, v0, v1, 25
284+
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
285+
; SI-NEXT: v_mov_b32_e32 v1, 0
286+
; SI-NEXT: s_setpc_b64 s[30:31]
287+
;
288+
; VI-LABEL: test_rotl_mul_with_mask_special_case:
289+
; VI: ; %bb.0:
290+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291+
; VI-NEXT: v_mul_lo_u32 v1, v1, 9
292+
; VI-NEXT: v_mul_hi_u32 v2, v0, 9
293+
; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1
294+
; VI-NEXT: v_alignbit_b32 v0, v0, v1, 25
295+
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
296+
; VI-NEXT: v_mov_b32_e32 v1, 0
297+
; VI-NEXT: s_setpc_b64 s[30:31]
298+
%lhs_mul = mul i64 %i, 1152
299+
%rhs_mul = mul i64 %i, 9
300+
%lhs_and = and i64 %lhs_mul, 160
301+
%rhs_shift = lshr i64 %rhs_mul, 57
302+
%out = add i64 %lhs_and, %rhs_shift
303+
ret i64 %out
304+
}
305+
306+
define i32 @test_fshl_with_mask_special_case(i32 %x) {
307+
; SI-LABEL: test_fshl_with_mask_special_case:
308+
; SI: ; %bb.0:
309+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310+
; SI-NEXT: v_or_b32_e32 v1, 1, v0
311+
; SI-NEXT: v_alignbit_b32 v0, v1, v0, 27
312+
; SI-NEXT: v_and_b32_e32 v0, 0xffffffe1, v0
313+
; SI-NEXT: s_setpc_b64 s[30:31]
314+
;
315+
; VI-LABEL: test_fshl_with_mask_special_case:
316+
; VI: ; %bb.0:
317+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318+
; VI-NEXT: v_or_b32_e32 v1, 1, v0
319+
; VI-NEXT: v_alignbit_b32 v0, v1, v0, 27
320+
; VI-NEXT: v_and_b32_e32 v0, 0xffffffe1, v0
321+
; VI-NEXT: s_setpc_b64 s[30:31]
322+
%or1 = or i32 %x, 1
323+
%sh1 = shl i32 %or1, 5
324+
%sh2 = lshr i32 %x, 27
325+
%1 = and i32 %sh2, 1
326+
%r = add i32 %sh1, %1
327+
ret i32 %r
328+
}

0 commit comments

Comments
 (0)