Skip to content

Commit 6255bae

Browse files
committed
[X86] Add test coverage based on #78888
1 parent 17cfc15 commit 6255bae

File tree

1 file changed

+142
-0
lines changed

1 file changed

+142
-0
lines changed
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE,SSE2
3+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE,SSE41
4+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
5+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
6+
7+
define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
8+
; SSE2-LABEL: pow2_mask_v16i8:
9+
; SSE2: # %bb.0:
10+
; SSE2-NEXT: movd %edi, %xmm0
11+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
12+
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
13+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
14+
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15+
; SSE2-NEXT: pxor %xmm1, %xmm1
16+
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
17+
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
18+
; SSE2-NEXT: pxor %xmm1, %xmm0
19+
; SSE2-NEXT: retq
20+
;
21+
; SSE41-LABEL: pow2_mask_v16i8:
22+
; SSE41: # %bb.0:
23+
; SSE41-NEXT: movd %edi, %xmm0
24+
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
25+
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
26+
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27+
; SSE41-NEXT: pxor %xmm1, %xmm1
28+
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
29+
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
30+
; SSE41-NEXT: pxor %xmm1, %xmm0
31+
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
32+
; SSE41-NEXT: retq
33+
;
34+
; AVX2-LABEL: pow2_mask_v16i8:
35+
; AVX2: # %bb.0:
36+
; AVX2-NEXT: vmovd %edi, %xmm0
37+
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
38+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
40+
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
41+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
42+
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
43+
; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
44+
; AVX2-NEXT: retq
45+
;
46+
; AVX512-LABEL: pow2_mask_v16i8:
47+
; AVX512: # %bb.0:
48+
; AVX512-NEXT: vpbroadcastb %edi, %xmm0
49+
; AVX512-NEXT: vptestmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
50+
; AVX512-NEXT: vpmovm2w %k0, %xmm0
51+
; AVX512-NEXT: retq
52+
%vec = insertelement <1 x i8> poison, i8 %0, i64 0
53+
%splat = shufflevector <1 x i8> %vec, <1 x i8> poison, <8 x i32> zeroinitializer
54+
%mask = and <8 x i8> %splat, <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>
55+
%not = icmp ne <8 x i8> %mask, zeroinitializer
56+
%ext = sext <8 x i1> %not to <8 x i16>
57+
ret <8 x i16> %ext
58+
}
59+
60+
define <16 x i16> @pow2_mask_v16i16(i16 zeroext %0) {
61+
; SSE-LABEL: pow2_mask_v16i16:
62+
; SSE: # %bb.0:
63+
; SSE-NEXT: movd %edi, %xmm0
64+
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
65+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
66+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,64,32,16,8,4,2,1]
67+
; SSE-NEXT: movdqa %xmm0, %xmm1
68+
; SSE-NEXT: pand %xmm2, %xmm1
69+
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [32768,16384,8192,4096,2048,1024,512,256]
70+
; SSE-NEXT: pand %xmm3, %xmm0
71+
; SSE-NEXT: pcmpeqw %xmm3, %xmm0
72+
; SSE-NEXT: pcmpeqw %xmm2, %xmm1
73+
; SSE-NEXT: retq
74+
;
75+
; AVX2-LABEL: pow2_mask_v16i16:
76+
; AVX2: # %bb.0:
77+
; AVX2-NEXT: vmovd %edi, %xmm0
78+
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
79+
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1]
80+
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
81+
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
82+
; AVX2-NEXT: retq
83+
;
84+
; AVX512-LABEL: pow2_mask_v16i16:
85+
; AVX512: # %bb.0:
86+
; AVX512-NEXT: vpbroadcastw %edi, %ymm0
87+
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1]
88+
; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
89+
; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
90+
; AVX512-NEXT: retq
91+
%vec = insertelement <1 x i16> poison, i16 %0, i64 0
92+
%splat = shufflevector <1 x i16> %vec, <1 x i16> poison, <16 x i32> zeroinitializer
93+
%mask = and <16 x i16> %splat, <i16 -32768, i16 16384, i16 8192, i16 4096, i16 2048, i16 1024, i16 512, i16 256, i16 128, i16 64, i16 32, i16 16, i16 8, i16 4, i16 2, i16 1>
94+
%not = icmp ne <16 x i16> %mask, zeroinitializer
95+
%ext = sext <16 x i1> %not to <16 x i16>
96+
ret <16 x i16> %ext
97+
}
98+
99+
; PR78888
100+
define i64 @pow2_mask_v8i8(i8 zeroext %0) {
101+
; SSE-LABEL: pow2_mask_v8i8:
102+
; SSE: # %bb.0:
103+
; SSE-NEXT: movd %edi, %xmm0
104+
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
105+
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
106+
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
107+
; SSE-NEXT: pxor %xmm1, %xmm1
108+
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
109+
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
110+
; SSE-NEXT: pxor %xmm1, %xmm0
111+
; SSE-NEXT: movq %xmm0, %rax
112+
; SSE-NEXT: retq
113+
;
114+
; AVX2-LABEL: pow2_mask_v8i8:
115+
; AVX2: # %bb.0:
116+
; AVX2-NEXT: vmovd %edi, %xmm0
117+
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
118+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
119+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
120+
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
121+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
122+
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
123+
; AVX2-NEXT: vmovq %xmm0, %rax
124+
; AVX2-NEXT: retq
125+
;
126+
; AVX512-LABEL: pow2_mask_v8i8:
127+
; AVX512: # %bb.0:
128+
; AVX512-NEXT: vpbroadcastb %edi, %xmm0
129+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
130+
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
131+
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
132+
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
133+
; AVX512-NEXT: vmovq %xmm0, %rax
134+
; AVX512-NEXT: retq
135+
%vec = insertelement <1 x i8> poison, i8 %0, i64 0
136+
%splat = shufflevector <1 x i8> %vec, <1 x i8> poison, <8 x i32> zeroinitializer
137+
%mask = and <8 x i8> %splat, <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>
138+
%not = icmp ne <8 x i8> %mask, zeroinitializer
139+
%ext = sext <8 x i1> %not to <8 x i8>
140+
%res = bitcast <8 x i8> %ext to i64
141+
ret i64 %res
142+
}

0 commit comments

Comments
 (0)