Skip to content

Commit 3f42cc1

Browse files
committed
[X86] pr53419.ll - add missing v2i8/v4i8/v8i8 coverage to reduction tests
1 parent d0a7bb9 commit 3f42cc1

File tree

1 file changed

+266
-9
lines changed

1 file changed

+266
-9
lines changed

llvm/test/CodeGen/X86/pr53419.ll

Lines changed: 266 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,77 @@
66
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64,AVX512
77
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86
88

9+
declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
910
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
1011
declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
1112

1213
; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
1314

15+
define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
16+
; SSE2-LABEL: intrinsic_v2i8:
17+
; SSE2: # %bb.0: # %bb
18+
; SSE2-NEXT: movzwl (%rsi), %eax
19+
; SSE2-NEXT: movd %eax, %xmm0
20+
; SSE2-NEXT: movzwl (%rdi), %eax
21+
; SSE2-NEXT: movd %eax, %xmm1
22+
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
23+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
24+
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
25+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
26+
; SSE2-NEXT: movmskpd %xmm0, %eax
27+
; SSE2-NEXT: cmpb $3, %al
28+
; SSE2-NEXT: sete %al
29+
; SSE2-NEXT: retq
30+
;
31+
; SSE42-LABEL: intrinsic_v2i8:
32+
; SSE42: # %bb.0: # %bb
33+
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
34+
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
35+
; SSE42-NEXT: psubq %xmm1, %xmm0
36+
; SSE42-NEXT: ptest %xmm0, %xmm0
37+
; SSE42-NEXT: sete %al
38+
; SSE42-NEXT: retq
39+
;
40+
; AVX-LABEL: intrinsic_v2i8:
41+
; AVX: # %bb.0: # %bb
42+
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
43+
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
44+
; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0
45+
; AVX-NEXT: vptest %xmm0, %xmm0
46+
; AVX-NEXT: sete %al
47+
; AVX-NEXT: retq
48+
;
49+
; AVX512-LABEL: intrinsic_v2i8:
50+
; AVX512: # %bb.0: # %bb
51+
; AVX512-NEXT: movzwl (%rsi), %eax
52+
; AVX512-NEXT: vmovd %eax, %xmm0
53+
; AVX512-NEXT: movzwl (%rdi), %eax
54+
; AVX512-NEXT: vmovd %eax, %xmm1
55+
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
56+
; AVX512-NEXT: knotw %k0, %k0
57+
; AVX512-NEXT: kmovd %k0, %eax
58+
; AVX512-NEXT: testb $3, %al
59+
; AVX512-NEXT: sete %al
60+
; AVX512-NEXT: retq
61+
;
62+
; X86-LABEL: intrinsic_v2i8:
63+
; X86: # %bb.0: # %bb
64+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
65+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
66+
; X86-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
67+
; X86-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
68+
; X86-NEXT: vpsubq %xmm1, %xmm0, %xmm0
69+
; X86-NEXT: vptest %xmm0, %xmm0
70+
; X86-NEXT: sete %al
71+
; X86-NEXT: retl
72+
bb:
73+
%lhs = load <2 x i8>, ptr %arg1, align 1
74+
%rhs = load <2 x i8>, ptr %arg, align 1
75+
%cmp = icmp eq <2 x i8> %lhs, %rhs
76+
%all_eq = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %cmp)
77+
ret i1 %all_eq
78+
}
79+
1480
define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
1581
; SSE2-LABEL: intrinsic_v4i8:
1682
; SSE2: # %bb.0: # %bb
@@ -120,8 +186,75 @@ bb:
120186
ret i1 %all_eq
121187
}
122188

123-
define i1 @vector_version(ptr align 1 %arg, ptr align 1 %arg1) {
124-
; SSE2-LABEL: vector_version:
189+
define i1 @vector_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
190+
; SSE2-LABEL: vector_version_v2i8:
191+
; SSE2: # %bb.0: # %bb
192+
; SSE2-NEXT: movzwl (%rsi), %eax
193+
; SSE2-NEXT: movd %eax, %xmm0
194+
; SSE2-NEXT: movzwl (%rdi), %eax
195+
; SSE2-NEXT: movd %eax, %xmm1
196+
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
197+
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
198+
; SSE2-NEXT: pxor %xmm1, %xmm0
199+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
200+
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
201+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
202+
; SSE2-NEXT: movmskpd %xmm0, %eax
203+
; SSE2-NEXT: testl %eax, %eax
204+
; SSE2-NEXT: sete %al
205+
; SSE2-NEXT: retq
206+
;
207+
; SSE42-LABEL: vector_version_v2i8:
208+
; SSE42: # %bb.0: # %bb
209+
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
210+
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
211+
; SSE42-NEXT: psubq %xmm1, %xmm0
212+
; SSE42-NEXT: ptest %xmm0, %xmm0
213+
; SSE42-NEXT: sete %al
214+
; SSE42-NEXT: retq
215+
;
216+
; AVX-LABEL: vector_version_v2i8:
217+
; AVX: # %bb.0: # %bb
218+
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
219+
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
220+
; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0
221+
; AVX-NEXT: vptest %xmm0, %xmm0
222+
; AVX-NEXT: sete %al
223+
; AVX-NEXT: retq
224+
;
225+
; AVX512-LABEL: vector_version_v2i8:
226+
; AVX512: # %bb.0: # %bb
227+
; AVX512-NEXT: movzwl (%rsi), %eax
228+
; AVX512-NEXT: vmovd %eax, %xmm0
229+
; AVX512-NEXT: movzwl (%rdi), %eax
230+
; AVX512-NEXT: vmovd %eax, %xmm1
231+
; AVX512-NEXT: vpcmpneqb %xmm1, %xmm0, %k0
232+
; AVX512-NEXT: kmovd %k0, %eax
233+
; AVX512-NEXT: testb $3, %al
234+
; AVX512-NEXT: sete %al
235+
; AVX512-NEXT: retq
236+
;
237+
; X86-LABEL: vector_version_v2i8:
238+
; X86: # %bb.0: # %bb
239+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
240+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
241+
; X86-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
242+
; X86-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
243+
; X86-NEXT: vpsubq %xmm1, %xmm0, %xmm0
244+
; X86-NEXT: vptest %xmm0, %xmm0
245+
; X86-NEXT: sete %al
246+
; X86-NEXT: retl
247+
bb:
248+
%lhs = load <2 x i8>, ptr %arg1, align 1
249+
%rhs = load <2 x i8>, ptr %arg, align 1
250+
%any_ne = icmp ne <2 x i8> %lhs, %rhs
251+
%any_ne_scalar = bitcast <2 x i1> %any_ne to i2
252+
%all_eq = icmp eq i2 %any_ne_scalar, 0
253+
ret i1 %all_eq
254+
}
255+
256+
define i1 @vector_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
257+
; SSE2-LABEL: vector_version_v4i8:
125258
; SSE2: # %bb.0: # %bb
126259
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
127260
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
@@ -135,7 +268,7 @@ define i1 @vector_version(ptr align 1 %arg, ptr align 1 %arg1) {
135268
; SSE2-NEXT: sete %al
136269
; SSE2-NEXT: retq
137270
;
138-
; SSE42-LABEL: vector_version:
271+
; SSE42-LABEL: vector_version_v4i8:
139272
; SSE42: # %bb.0: # %bb
140273
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
141274
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
@@ -144,7 +277,7 @@ define i1 @vector_version(ptr align 1 %arg, ptr align 1 %arg1) {
144277
; SSE42-NEXT: sete %al
145278
; SSE42-NEXT: retq
146279
;
147-
; AVX-LABEL: vector_version:
280+
; AVX-LABEL: vector_version_v4i8:
148281
; AVX: # %bb.0: # %bb
149282
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
150283
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
@@ -153,7 +286,7 @@ define i1 @vector_version(ptr align 1 %arg, ptr align 1 %arg1) {
153286
; AVX-NEXT: sete %al
154287
; AVX-NEXT: retq
155288
;
156-
; AVX512-LABEL: vector_version:
289+
; AVX512-LABEL: vector_version_v4i8:
157290
; AVX512: # %bb.0: # %bb
158291
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
159292
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
@@ -163,7 +296,7 @@ define i1 @vector_version(ptr align 1 %arg, ptr align 1 %arg1) {
163296
; AVX512-NEXT: sete %al
164297
; AVX512-NEXT: retq
165298
;
166-
; X86-LABEL: vector_version:
299+
; X86-LABEL: vector_version_v4i8:
167300
; X86: # %bb.0: # %bb
168301
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
169302
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -182,6 +315,81 @@ bb:
182315
ret i1 %all_eq
183316
}
184317

318+
define i1 @vector_version_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
319+
; SSE-LABEL: vector_version_v8i8:
320+
; SSE: # %bb.0: # %bb
321+
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
322+
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
323+
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
324+
; SSE-NEXT: pmovmskb %xmm1, %eax
325+
; SSE-NEXT: xorb $-1, %al
326+
; SSE-NEXT: sete %al
327+
; SSE-NEXT: retq
328+
;
329+
; AVX-LABEL: vector_version_v8i8:
330+
; AVX: # %bb.0: # %bb
331+
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
332+
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
333+
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
334+
; AVX-NEXT: vpmovmskb %xmm0, %eax
335+
; AVX-NEXT: xorb $-1, %al
336+
; AVX-NEXT: sete %al
337+
; AVX-NEXT: retq
338+
;
339+
; AVX512-LABEL: vector_version_v8i8:
340+
; AVX512: # %bb.0: # %bb
341+
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
342+
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
343+
; AVX512-NEXT: vpcmpneqb %xmm1, %xmm0, %k0
344+
; AVX512-NEXT: kortestb %k0, %k0
345+
; AVX512-NEXT: sete %al
346+
; AVX512-NEXT: retq
347+
;
348+
; X86-LABEL: vector_version_v8i8:
349+
; X86: # %bb.0: # %bb
350+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
351+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
352+
; X86-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
353+
; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
354+
; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
355+
; X86-NEXT: vpmovmskb %xmm0, %eax
356+
; X86-NEXT: xorb $-1, %al
357+
; X86-NEXT: sete %al
358+
; X86-NEXT: retl
359+
bb:
360+
%lhs = load <8 x i8>, ptr %arg1, align 1
361+
%rhs = load <8 x i8>, ptr %arg, align 1
362+
%any_ne = icmp ne <8 x i8> %lhs, %rhs
363+
%any_ne_scalar = bitcast <8 x i1> %any_ne to i8
364+
%all_eq = icmp eq i8 %any_ne_scalar, 0
365+
ret i1 %all_eq
366+
}
367+
368+
define i1 @mixed_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
369+
; X64-LABEL: mixed_version_v2i8:
370+
; X64: # %bb.0: # %bb
371+
; X64-NEXT: movzwl (%rsi), %eax
372+
; X64-NEXT: cmpw (%rdi), %ax
373+
; X64-NEXT: sete %al
374+
; X64-NEXT: retq
375+
;
376+
; X86-LABEL: mixed_version_v2i8:
377+
; X86: # %bb.0: # %bb
378+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
379+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
380+
; X86-NEXT: movzwl (%ecx), %ecx
381+
; X86-NEXT: cmpw (%eax), %cx
382+
; X86-NEXT: sete %al
383+
; X86-NEXT: retl
384+
bb:
385+
%lhs = load <2 x i8>, ptr %arg1, align 1
386+
%rhs = load <2 x i8>, ptr %arg, align 1
387+
%lhs_s = bitcast <2 x i8> %lhs to i16
388+
%rhs_s = bitcast <2 x i8> %rhs to i16
389+
%all_eq = icmp eq i16 %lhs_s, %rhs_s
390+
ret i1 %all_eq
391+
}
392+
185393
define i1 @mixed_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
186394
; X64-LABEL: mixed_version_v4i8:
187395
; X64: # %bb.0: # %bb
@@ -235,15 +443,38 @@ bb:
235443
ret i1 %all_eq
236444
}
237445

238-
define i1 @scalar_version(ptr align 1 %arg, ptr align 1 %arg1) {
239-
; X64-LABEL: scalar_version:
446+
define i1 @scalar_version_i16(ptr align 1 %arg, ptr align 1 %arg1) {
447+
; X64-LABEL: scalar_version_i16:
448+
; X64: # %bb.0: # %bb
449+
; X64-NEXT: movzwl (%rsi), %eax
450+
; X64-NEXT: cmpw (%rdi), %ax
451+
; X64-NEXT: sete %al
452+
; X64-NEXT: retq
453+
;
454+
; X86-LABEL: scalar_version_i16:
455+
; X86: # %bb.0: # %bb
456+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
457+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
458+
; X86-NEXT: movzwl (%ecx), %ecx
459+
; X86-NEXT: cmpw (%eax), %cx
460+
; X86-NEXT: sete %al
461+
; X86-NEXT: retl
462+
bb:
463+
%lhs = load i16, ptr %arg1, align 1
464+
%rhs = load i16, ptr %arg, align 1
465+
%all_eq = icmp eq i16 %lhs, %rhs
466+
ret i1 %all_eq
467+
}
468+
469+
define i1 @scalar_version_i32(ptr align 1 %arg, ptr align 1 %arg1) {
470+
; X64-LABEL: scalar_version_i32:
240471
; X64: # %bb.0: # %bb
241472
; X64-NEXT: movl (%rsi), %eax
242473
; X64-NEXT: cmpl (%rdi), %eax
243474
; X64-NEXT: sete %al
244475
; X64-NEXT: retq
245476
;
246-
; X86-LABEL: scalar_version:
477+
; X86-LABEL: scalar_version_i32:
247478
; X86: # %bb.0: # %bb
248479
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
249480
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -257,3 +488,29 @@ bb:
257488
%all_eq = icmp eq i32 %lhs, %rhs
258489
ret i1 %all_eq
259490
}
491+
492+
define i1 @scalar_version_i64(ptr align 1 %arg, ptr align 1 %arg1) {
493+
; X64-LABEL: scalar_version_i64:
494+
; X64: # %bb.0: # %bb
495+
; X64-NEXT: movq (%rsi), %rax
496+
; X64-NEXT: cmpq (%rdi), %rax
497+
; X64-NEXT: sete %al
498+
; X64-NEXT: retq
499+
;
500+
; X86-LABEL: scalar_version_i64:
501+
; X86: # %bb.0: # %bb
502+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
503+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
504+
; X86-NEXT: movl (%ecx), %edx
505+
; X86-NEXT: movl 4(%ecx), %ecx
506+
; X86-NEXT: xorl 4(%eax), %ecx
507+
; X86-NEXT: xorl (%eax), %edx
508+
; X86-NEXT: orl %ecx, %edx
509+
; X86-NEXT: sete %al
510+
; X86-NEXT: retl
511+
bb:
512+
%lhs = load i64, ptr %arg1, align 1
513+
%rhs = load i64, ptr %arg, align 1
514+
%all_eq = icmp eq i64 %lhs, %rhs
515+
ret i1 %all_eq
516+
}

0 commit comments

Comments
 (0)