Skip to content

Commit 6d366f7

Browse files
committed
[X86] Add tests for folding (icmp ult (add x,-C),2) -> (or (icmp eq X,C), (icmp eq X,C+1)); NFC
1 parent f51ade2 commit 6d366f7

File tree

1 file changed

+356
-0
lines changed

1 file changed

+356
-0
lines changed
Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
4+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
5+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
6+
7+
declare void @use.v4.i32(<4 x i32>)
8+
9+
define <4 x i32> @eq_or_eq_ult_2(<4 x i32> %x) {
10+
; AVX512-LABEL: eq_or_eq_ult_2:
11+
; AVX512: # %bb.0:
12+
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
13+
; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14+
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15+
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16+
; AVX512-NEXT: retq
17+
;
18+
; AVX2-LABEL: eq_or_eq_ult_2:
19+
; AVX2: # %bb.0:
20+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967291,4294967291,4294967291,4294967291]
21+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
22+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
23+
; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
24+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
25+
; AVX2-NEXT: retq
26+
;
27+
; SSE41-LABEL: eq_or_eq_ult_2:
28+
; SSE41: # %bb.0:
29+
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1]
31+
; SSE41-NEXT: pminud %xmm0, %xmm1
32+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
33+
; SSE41-NEXT: retq
34+
;
35+
; SSE2-LABEL: eq_or_eq_ult_2:
36+
; SSE2: # %bb.0:
37+
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
38+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
39+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650,2147483650,2147483650]
40+
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
41+
; SSE2-NEXT: movdqa %xmm1, %xmm0
42+
; SSE2-NEXT: retq
43+
%x_adj = add <4 x i32> %x, <i32 -5, i32 -5, i32 -5, i32 -5>
44+
%cmp = icmp ult <4 x i32> %x_adj, <i32 2, i32 2, i32 2, i32 2>
45+
%r = sext <4 x i1> %cmp to <4 x i32>
46+
ret <4 x i32> %r
47+
}
48+
49+
define <4 x i32> @eq_or_eq_ult_2_only_transform_sse2(<4 x i32> %x) {
50+
; AVX512-LABEL: eq_or_eq_ult_2_only_transform_sse2:
51+
; AVX512: # %bb.0:
52+
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
53+
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
54+
; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
55+
; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
56+
; AVX512-NEXT: retq
57+
;
58+
; AVX2-LABEL: eq_or_eq_ult_2_only_transform_sse2:
59+
; AVX2: # %bb.0:
60+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
61+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
62+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
63+
; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
64+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
65+
; AVX2-NEXT: retq
66+
;
67+
; SSE41-LABEL: eq_or_eq_ult_2_only_transform_sse2:
68+
; SSE41: # %bb.0:
69+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
70+
; SSE41-NEXT: paddd %xmm1, %xmm0
71+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1]
72+
; SSE41-NEXT: pminud %xmm0, %xmm1
73+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
74+
; SSE41-NEXT: retq
75+
;
76+
; SSE2-LABEL: eq_or_eq_ult_2_only_transform_sse2:
77+
; SSE2: # %bb.0:
78+
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
79+
; SSE2-NEXT: paddd %xmm0, %xmm1
80+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
81+
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483650,2147483650,2147483650,2147483650]
82+
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
83+
; SSE2-NEXT: retq
84+
%x_adj = add <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
85+
%cmp = icmp ult <4 x i32> %x_adj, <i32 2, i32 2, i32 2, i32 2>
86+
%r = sext <4 x i1> %cmp to <4 x i32>
87+
ret <4 x i32> %r
88+
}
89+
90+
define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) {
91+
; AVX512-LABEL: eq_or_eq_ult_2_fail_multiuse:
92+
; AVX512: # %bb.0:
93+
; AVX512-NEXT: subq $24, %rsp
94+
; AVX512-NEXT: .cfi_def_cfa_offset 32
95+
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
96+
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
97+
; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
98+
; AVX512-NEXT: callq use.v4.i32@PLT
99+
; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
100+
; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
101+
; AVX512-NEXT: vmovdqa32 {{.*#+}} xmm0 {%k1} {z} = [4294967295,4294967295,4294967295,4294967295]
102+
; AVX512-NEXT: addq $24, %rsp
103+
; AVX512-NEXT: .cfi_def_cfa_offset 8
104+
; AVX512-NEXT: retq
105+
;
106+
; AVX2-LABEL: eq_or_eq_ult_2_fail_multiuse:
107+
; AVX2: # %bb.0:
108+
; AVX2-NEXT: subq $24, %rsp
109+
; AVX2-NEXT: .cfi_def_cfa_offset 32
110+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
111+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
112+
; AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
113+
; AVX2-NEXT: callq use.v4.i32@PLT
114+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1]
115+
; AVX2-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
116+
; AVX2-NEXT: vpminud %xmm0, %xmm1, %xmm0
117+
; AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
118+
; AVX2-NEXT: addq $24, %rsp
119+
; AVX2-NEXT: .cfi_def_cfa_offset 8
120+
; AVX2-NEXT: retq
121+
;
122+
; SSE41-LABEL: eq_or_eq_ult_2_fail_multiuse:
123+
; SSE41: # %bb.0:
124+
; SSE41-NEXT: subq $24, %rsp
125+
; SSE41-NEXT: .cfi_def_cfa_offset 32
126+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
127+
; SSE41-NEXT: paddd %xmm1, %xmm0
128+
; SSE41-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
129+
; SSE41-NEXT: callq use.v4.i32@PLT
130+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [1,1,1,1]
131+
; SSE41-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
132+
; SSE41-NEXT: pminud %xmm1, %xmm0
133+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
134+
; SSE41-NEXT: addq $24, %rsp
135+
; SSE41-NEXT: .cfi_def_cfa_offset 8
136+
; SSE41-NEXT: retq
137+
;
138+
; SSE2-LABEL: eq_or_eq_ult_2_fail_multiuse:
139+
; SSE2: # %bb.0:
140+
; SSE2-NEXT: subq $24, %rsp
141+
; SSE2-NEXT: .cfi_def_cfa_offset 32
142+
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
143+
; SSE2-NEXT: paddd %xmm0, %xmm1
144+
; SSE2-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
145+
; SSE2-NEXT: movdqa %xmm1, %xmm0
146+
; SSE2-NEXT: callq use.v4.i32@PLT
147+
; SSE2-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
148+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
149+
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483650,2147483650,2147483650,2147483650]
150+
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
151+
; SSE2-NEXT: addq $24, %rsp
152+
; SSE2-NEXT: .cfi_def_cfa_offset 8
153+
; SSE2-NEXT: retq
154+
%x_adj = add <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
155+
call void @use.v4.i32(<4 x i32> %x_adj)
156+
%cmp = icmp ult <4 x i32> %x_adj, <i32 2, i32 2, i32 2, i32 2>
157+
%r = sext <4 x i1> %cmp to <4 x i32>
158+
ret <4 x i32> %r
159+
}
160+
161+
define <4 x i32> @eq_or_eq_ult_3_fail(<4 x i32> %x) {
162+
; AVX512-LABEL: eq_or_eq_ult_3_fail:
163+
; AVX512: # %bb.0:
164+
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
165+
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
166+
; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
167+
; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
168+
; AVX512-NEXT: retq
169+
;
170+
; AVX2-LABEL: eq_or_eq_ult_3_fail:
171+
; AVX2: # %bb.0:
172+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
173+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
174+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
175+
; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
176+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
177+
; AVX2-NEXT: retq
178+
;
179+
; SSE41-LABEL: eq_or_eq_ult_3_fail:
180+
; SSE41: # %bb.0:
181+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
182+
; SSE41-NEXT: paddd %xmm1, %xmm0
183+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [2,2,2,2]
184+
; SSE41-NEXT: pminud %xmm0, %xmm1
185+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
186+
; SSE41-NEXT: retq
187+
;
188+
; SSE2-LABEL: eq_or_eq_ult_3_fail:
189+
; SSE2: # %bb.0:
190+
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
191+
; SSE2-NEXT: paddd %xmm0, %xmm1
192+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
193+
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651,2147483651,2147483651]
194+
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
195+
; SSE2-NEXT: retq
196+
%x_adj = add <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
197+
%cmp = icmp ult <4 x i32> %x_adj, <i32 3, i32 3, i32 3, i32 3>
198+
%r = sext <4 x i1> %cmp to <4 x i32>
199+
ret <4 x i32> %r
200+
}
201+
202+
define <4 x i32> @eq_or_eq_ugt_m3(<4 x i32> %x) {
203+
; AVX512-LABEL: eq_or_eq_ugt_m3:
204+
; AVX512: # %bb.0:
205+
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
206+
; AVX512-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
207+
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
208+
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
209+
; AVX512-NEXT: retq
210+
;
211+
; AVX2-LABEL: eq_or_eq_ugt_m3:
212+
; AVX2: # %bb.0:
213+
; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
214+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
215+
; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm1
216+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
217+
; AVX2-NEXT: retq
218+
;
219+
; SSE41-LABEL: eq_or_eq_ugt_m3:
220+
; SSE41: # %bb.0:
221+
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
222+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
223+
; SSE41-NEXT: pmaxud %xmm0, %xmm1
224+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
225+
; SSE41-NEXT: retq
226+
;
227+
; SSE2-LABEL: eq_or_eq_ugt_m3:
228+
; SSE2: # %bb.0:
229+
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
231+
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
232+
; SSE2-NEXT: retq
233+
%x_adj = add <4 x i32> %x, <i32 -11, i32 -14, i32 -11, i32 -11>
234+
%cmp = icmp ugt <4 x i32> %x_adj, <i32 -3, i32 -3, i32 -3, i32 -3>
235+
%r = sext <4 x i1> %cmp to <4 x i32>
236+
ret <4 x i32> %r
237+
}
238+
239+
define <4 x i32> @eq_or_eq_ule_1(<4 x i32> %x) {
240+
; AVX512-LABEL: eq_or_eq_ule_1:
241+
; AVX512: # %bb.0:
242+
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
243+
; AVX512-NEXT: vpcmpleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
244+
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
245+
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
246+
; AVX512-NEXT: retq
247+
;
248+
; AVX2-LABEL: eq_or_eq_ule_1:
249+
; AVX2: # %bb.0:
250+
; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
251+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
252+
; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
253+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
254+
; AVX2-NEXT: retq
255+
;
256+
; SSE41-LABEL: eq_or_eq_ule_1:
257+
; SSE41: # %bb.0:
258+
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
259+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [1,1,1,1]
260+
; SSE41-NEXT: pminud %xmm0, %xmm1
261+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
262+
; SSE41-NEXT: retq
263+
;
264+
; SSE2-LABEL: eq_or_eq_ule_1:
265+
; SSE2: # %bb.0:
266+
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
268+
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
269+
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
270+
; SSE2-NEXT: pxor %xmm1, %xmm0
271+
; SSE2-NEXT: retq
272+
%x_adj = add <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4>
273+
%cmp = icmp ule <4 x i32> %x_adj, <i32 1, i32 1, i32 1, i32 1>
274+
%r = sext <4 x i1> %cmp to <4 x i32>
275+
ret <4 x i32> %r
276+
}
277+
278+
define <4 x i32> @eq_or_eq_uge_m2(<4 x i32> %x) {
279+
; AVX512-LABEL: eq_or_eq_uge_m2:
280+
; AVX512: # %bb.0:
281+
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
282+
; AVX512-NEXT: vpcmpnltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
283+
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
284+
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
285+
; AVX512-NEXT: retq
286+
;
287+
; AVX2-LABEL: eq_or_eq_uge_m2:
288+
; AVX2: # %bb.0:
289+
; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
290+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
291+
; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm1
292+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
293+
; AVX2-NEXT: retq
294+
;
295+
; SSE41-LABEL: eq_or_eq_uge_m2:
296+
; SSE41: # %bb.0:
297+
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
298+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
299+
; SSE41-NEXT: pmaxud %xmm0, %xmm1
300+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
301+
; SSE41-NEXT: retq
302+
;
303+
; SSE2-LABEL: eq_or_eq_uge_m2:
304+
; SSE2: # %bb.0:
305+
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
306+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
307+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483646,2147483646,2147483646,2147483646]
308+
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
309+
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
310+
; SSE2-NEXT: pxor %xmm1, %xmm0
311+
; SSE2-NEXT: retq
312+
%x_adj = add <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4>
313+
%cmp = icmp uge <4 x i32> %x_adj, <i32 -2, i32 -2, i32 -2, i32 -2>
314+
%r = sext <4 x i1> %cmp to <4 x i32>
315+
ret <4 x i32> %r
316+
}
317+
318+
define <4 x i32> @eq_or_eq_uge_2_fail_(<4 x i32> %x) {
319+
; AVX512-LABEL: eq_or_eq_uge_2_fail_:
320+
; AVX512: # %bb.0:
321+
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
322+
; AVX512-NEXT: vpcmpnltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
323+
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
324+
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
325+
; AVX512-NEXT: retq
326+
;
327+
; AVX2-LABEL: eq_or_eq_uge_2_fail_:
328+
; AVX2: # %bb.0:
329+
; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
330+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
331+
; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm1
332+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
333+
; AVX2-NEXT: retq
334+
;
335+
; SSE41-LABEL: eq_or_eq_uge_2_fail_:
336+
; SSE41: # %bb.0:
337+
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
338+
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [2,2,2,2]
339+
; SSE41-NEXT: pmaxud %xmm0, %xmm1
340+
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
341+
; SSE41-NEXT: retq
342+
;
343+
; SSE2-LABEL: eq_or_eq_uge_2_fail_:
344+
; SSE2: # %bb.0:
345+
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
346+
; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
347+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650,2147483650,2147483650]
348+
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
349+
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
350+
; SSE2-NEXT: pxor %xmm1, %xmm0
351+
; SSE2-NEXT: retq
352+
%x_adj = add <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4>
353+
%cmp = icmp uge <4 x i32> %x_adj, <i32 2, i32 2, i32 2, i32 2>
354+
%r = sext <4 x i1> %cmp to <4 x i32>
355+
ret <4 x i32> %r
356+
}

0 commit comments

Comments
 (0)