Skip to content

Commit 8c41e3f

Browse files
committed
[X86] Add test case for Issue #78897
1 parent 2cff46f commit 8c41e3f

File tree

1 file changed

+313
-0
lines changed

1 file changed

+313
-0
lines changed

llvm/test/CodeGen/X86/pr78897.ll

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X86-SSE2
3+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64-SSE2
4+
; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X86-SSE42
5+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64-SSE42
6+
; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X86-AVX2
7+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64-AVX2
8+
; RUN: llc < %s -mtriple=i686-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X86-AVX512
9+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64-AVX512
10+
11+
; FIXME: PR78897 - Don't vectorize a mul if we still need the extract
12+
define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
13+
; X86-SSE2-LABEL: produceShuffleVectorForByte:
14+
; X86-SSE2: # %bb.0: # %entry
15+
; X86-SSE2-NEXT: pushl %ebx
16+
; X86-SSE2-NEXT: pushl %edi
17+
; X86-SSE2-NEXT: pushl %esi
18+
; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
19+
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
20+
; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
21+
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
22+
; X86-SSE2-NEXT: pxor %xmm0, %xmm0
23+
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
24+
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
25+
; X86-SSE2-NEXT: pand %xmm0, %xmm1
26+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
27+
; X86-SSE2-NEXT: movd %xmm2, %esi
28+
; X86-SSE2-NEXT: movd %xmm1, %ecx
29+
; X86-SSE2-NEXT: movl $286331152, %edi # imm = 0x11111110
30+
; X86-SSE2-NEXT: movl %ecx, %eax
31+
; X86-SSE2-NEXT: mull %edi
32+
; X86-SSE2-NEXT: imull $286331153, %ecx, %ebx # imm = 0x11111111
33+
; X86-SSE2-NEXT: addl %edx, %ebx
34+
; X86-SSE2-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110
35+
; X86-SSE2-NEXT: addl %ebx, %edx
36+
; X86-SSE2-NEXT: movd %edx, %xmm2
37+
; X86-SSE2-NEXT: movd %eax, %xmm1
38+
; X86-SSE2-NEXT: xorl $286331153, %ecx # imm = 0x11111111
39+
; X86-SSE2-NEXT: movl %ecx, %eax
40+
; X86-SSE2-NEXT: mull %edi
41+
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
42+
; X86-SSE2-NEXT: xorl $17895697, %esi # imm = 0x1111111
43+
; X86-SSE2-NEXT: imull $286331153, %ecx, %ecx # imm = 0x11111111
44+
; X86-SSE2-NEXT: addl %edx, %ecx
45+
; X86-SSE2-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110
46+
; X86-SSE2-NEXT: addl %ecx, %edx
47+
; X86-SSE2-NEXT: movd %edx, %xmm2
48+
; X86-SSE2-NEXT: movd %eax, %xmm3
49+
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
50+
; X86-SSE2-NEXT: pand %xmm0, %xmm1
51+
; X86-SSE2-NEXT: pandn %xmm3, %xmm0
52+
; X86-SSE2-NEXT: por %xmm1, %xmm0
53+
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
54+
; X86-SSE2-NEXT: psrlw $4, %xmm1
55+
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
56+
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
57+
; X86-SSE2-NEXT: popl %esi
58+
; X86-SSE2-NEXT: popl %edi
59+
; X86-SSE2-NEXT: popl %ebx
60+
; X86-SSE2-NEXT: retl
61+
;
62+
; X64-SSE2-LABEL: produceShuffleVectorForByte:
63+
; X64-SSE2: # %bb.0: # %entry
64+
; X64-SSE2-NEXT: movd %edi, %xmm0
65+
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
66+
; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
67+
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
68+
; X64-SSE2-NEXT: pxor %xmm0, %xmm0
69+
; X64-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
70+
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
71+
; X64-SSE2-NEXT: pand %xmm0, %xmm1
72+
; X64-SSE2-NEXT: movq %xmm1, %rax
73+
; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
74+
; X64-SSE2-NEXT: psrlq $32, %xmm2
75+
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1229782938247303440,1229782938247303440]
76+
; X64-SSE2-NEXT: pmuludq %xmm3, %xmm2
77+
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [286331153,286331153]
78+
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm4
79+
; X64-SSE2-NEXT: paddq %xmm2, %xmm4
80+
; X64-SSE2-NEXT: psllq $32, %xmm4
81+
; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1
82+
; X64-SSE2-NEXT: paddq %xmm4, %xmm1
83+
; X64-SSE2-NEXT: movabsq $76861433640456465, %rcx # imm = 0x111111111111111
84+
; X64-SSE2-NEXT: xorq %rax, %rcx
85+
; X64-SSE2-NEXT: movabsq $1229782938247303440, %rax # imm = 0x1111111111111110
86+
; X64-SSE2-NEXT: imulq %rcx, %rax
87+
; X64-SSE2-NEXT: movq %rax, %xmm2
88+
; X64-SSE2-NEXT: pand %xmm0, %xmm1
89+
; X64-SSE2-NEXT: pandn %xmm2, %xmm0
90+
; X64-SSE2-NEXT: por %xmm1, %xmm0
91+
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
92+
; X64-SSE2-NEXT: psrlw $4, %xmm1
93+
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
94+
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
95+
; X64-SSE2-NEXT: retq
96+
;
97+
; X86-SSE42-LABEL: produceShuffleVectorForByte:
98+
; X86-SSE42: # %bb.0: # %entry
99+
; X86-SSE42-NEXT: pushl %ebx
100+
; X86-SSE42-NEXT: pushl %edi
101+
; X86-SSE42-NEXT: pushl %esi
102+
; X86-SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
103+
; X86-SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
104+
; X86-SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
105+
; X86-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
106+
; X86-SSE42-NEXT: pxor %xmm0, %xmm0
107+
; X86-SSE42-NEXT: pcmpeqb %xmm1, %xmm0
108+
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
109+
; X86-SSE42-NEXT: pand %xmm0, %xmm1
110+
; X86-SSE42-NEXT: movd %xmm1, %ecx
111+
; X86-SSE42-NEXT: movl $286331152, %edi # imm = 0x11111110
112+
; X86-SSE42-NEXT: movl %ecx, %eax
113+
; X86-SSE42-NEXT: mull %edi
114+
; X86-SSE42-NEXT: pextrd $1, %xmm1, %esi
115+
; X86-SSE42-NEXT: imull $286331153, %ecx, %ebx # imm = 0x11111111
116+
; X86-SSE42-NEXT: addl %edx, %ebx
117+
; X86-SSE42-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110
118+
; X86-SSE42-NEXT: addl %ebx, %edx
119+
; X86-SSE42-NEXT: movd %eax, %xmm2
120+
; X86-SSE42-NEXT: pinsrd $1, %edx, %xmm2
121+
; X86-SSE42-NEXT: xorl $286331153, %ecx # imm = 0x11111111
122+
; X86-SSE42-NEXT: movl %ecx, %eax
123+
; X86-SSE42-NEXT: mull %edi
124+
; X86-SSE42-NEXT: xorl $17895697, %esi # imm = 0x1111111
125+
; X86-SSE42-NEXT: imull $286331153, %ecx, %ecx # imm = 0x11111111
126+
; X86-SSE42-NEXT: addl %edx, %ecx
127+
; X86-SSE42-NEXT: imull $286331152, %esi, %edx # imm = 0x11111110
128+
; X86-SSE42-NEXT: addl %ecx, %edx
129+
; X86-SSE42-NEXT: movd %eax, %xmm1
130+
; X86-SSE42-NEXT: pinsrd $1, %edx, %xmm1
131+
; X86-SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1
132+
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
133+
; X86-SSE42-NEXT: psrlw $4, %xmm0
134+
; X86-SSE42-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
135+
; X86-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
136+
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
137+
; X86-SSE42-NEXT: popl %esi
138+
; X86-SSE42-NEXT: popl %edi
139+
; X86-SSE42-NEXT: popl %ebx
140+
; X86-SSE42-NEXT: retl
141+
;
142+
; X64-SSE42-LABEL: produceShuffleVectorForByte:
143+
; X64-SSE42: # %bb.0: # %entry
144+
; X64-SSE42-NEXT: movd %edi, %xmm0
145+
; X64-SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
146+
; X64-SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
147+
; X64-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
148+
; X64-SSE42-NEXT: pxor %xmm0, %xmm0
149+
; X64-SSE42-NEXT: pcmpeqb %xmm1, %xmm0
150+
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
151+
; X64-SSE42-NEXT: pand %xmm0, %xmm2
152+
; X64-SSE42-NEXT: movq %xmm2, %rax
153+
; X64-SSE42-NEXT: movdqa %xmm2, %xmm1
154+
; X64-SSE42-NEXT: psrlq $32, %xmm1
155+
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [1229782938247303440,1229782938247303440]
156+
; X64-SSE42-NEXT: pmuludq %xmm3, %xmm1
157+
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm4 = [286331153,286331153]
158+
; X64-SSE42-NEXT: pmuludq %xmm2, %xmm4
159+
; X64-SSE42-NEXT: paddq %xmm1, %xmm4
160+
; X64-SSE42-NEXT: psllq $32, %xmm4
161+
; X64-SSE42-NEXT: pmuludq %xmm3, %xmm2
162+
; X64-SSE42-NEXT: paddq %xmm4, %xmm2
163+
; X64-SSE42-NEXT: movabsq $76861433640456465, %rcx # imm = 0x111111111111111
164+
; X64-SSE42-NEXT: xorq %rax, %rcx
165+
; X64-SSE42-NEXT: movabsq $1229782938247303440, %rax # imm = 0x1111111111111110
166+
; X64-SSE42-NEXT: imulq %rcx, %rax
167+
; X64-SSE42-NEXT: movq %rax, %xmm1
168+
; X64-SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1
169+
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
170+
; X64-SSE42-NEXT: psrlw $4, %xmm0
171+
; X64-SSE42-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
172+
; X64-SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
173+
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
174+
; X64-SSE42-NEXT: retq
175+
;
176+
; X86-AVX2-LABEL: produceShuffleVectorForByte:
177+
; X86-AVX2: # %bb.0: # %entry
178+
; X86-AVX2-NEXT: pushl %ebx
179+
; X86-AVX2-NEXT: pushl %edi
180+
; X86-AVX2-NEXT: pushl %esi
181+
; X86-AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0
182+
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
183+
; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
184+
; X86-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
185+
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
186+
; X86-AVX2-NEXT: vmovd %xmm1, %edx
187+
; X86-AVX2-NEXT: movl $286331152, %ecx # imm = 0x11111110
188+
; X86-AVX2-NEXT: mulxl %ecx, %edi, %esi
189+
; X86-AVX2-NEXT: vpextrd $1, %xmm1, %eax
190+
; X86-AVX2-NEXT: imull $286331153, %edx, %ebx # imm = 0x11111111
191+
; X86-AVX2-NEXT: addl %esi, %ebx
192+
; X86-AVX2-NEXT: imull $286331152, %eax, %esi # imm = 0x11111110
193+
; X86-AVX2-NEXT: addl %ebx, %esi
194+
; X86-AVX2-NEXT: vmovd %edi, %xmm1
195+
; X86-AVX2-NEXT: xorl $286331153, %edx # imm = 0x11111111
196+
; X86-AVX2-NEXT: mulxl %ecx, %edi, %ecx
197+
; X86-AVX2-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
198+
; X86-AVX2-NEXT: xorl $17895697, %eax # imm = 0x1111111
199+
; X86-AVX2-NEXT: imull $286331153, %edx, %edx # imm = 0x11111111
200+
; X86-AVX2-NEXT: addl %ecx, %edx
201+
; X86-AVX2-NEXT: imull $286331152, %eax, %eax # imm = 0x11111110
202+
; X86-AVX2-NEXT: addl %edx, %eax
203+
; X86-AVX2-NEXT: vmovd %edi, %xmm2
204+
; X86-AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
205+
; X86-AVX2-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
206+
; X86-AVX2-NEXT: vpsrlw $4, %xmm0, %xmm1
207+
; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
208+
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
209+
; X86-AVX2-NEXT: popl %esi
210+
; X86-AVX2-NEXT: popl %edi
211+
; X86-AVX2-NEXT: popl %ebx
212+
; X86-AVX2-NEXT: retl
213+
;
214+
; X64-AVX2-LABEL: produceShuffleVectorForByte:
215+
; X64-AVX2: # %bb.0: # %entry
216+
; X64-AVX2-NEXT: vmovd %edi, %xmm0
217+
; X64-AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
218+
; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
219+
; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
220+
; X64-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
221+
; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
222+
; X64-AVX2-NEXT: vmovq %xmm1, %rax
223+
; X64-AVX2-NEXT: vpsrlq $32, %xmm1, %xmm2
224+
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1229782938247303440,1229782938247303440]
225+
; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm4
226+
; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
227+
; X64-AVX2-NEXT: vpaddq %xmm2, %xmm4, %xmm2
228+
; X64-AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
229+
; X64-AVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
230+
; X64-AVX2-NEXT: vpaddq %xmm2, %xmm1, %xmm1
231+
; X64-AVX2-NEXT: movabsq $76861433640456465, %rcx # imm = 0x111111111111111
232+
; X64-AVX2-NEXT: xorq %rax, %rcx
233+
; X64-AVX2-NEXT: movabsq $1229782938247303440, %rax # imm = 0x1111111111111110
234+
; X64-AVX2-NEXT: imulq %rcx, %rax
235+
; X64-AVX2-NEXT: vmovq %rax, %xmm2
236+
; X64-AVX2-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
237+
; X64-AVX2-NEXT: vpsrlw $4, %xmm0, %xmm1
238+
; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
239+
; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
240+
; X64-AVX2-NEXT: retq
241+
;
242+
; X86-AVX512-LABEL: produceShuffleVectorForByte:
243+
; X86-AVX512: # %bb.0: # %entry
244+
; X86-AVX512-NEXT: pushl %ebx
245+
; X86-AVX512-NEXT: pushl %edi
246+
; X86-AVX512-NEXT: pushl %esi
247+
; X86-AVX512-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0
248+
; X86-AVX512-NEXT: vptestnmb {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %k1
249+
; X86-AVX512-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1} {z}
250+
; X86-AVX512-NEXT: vpextrd $1, %xmm0, %eax
251+
; X86-AVX512-NEXT: vmovd %xmm0, %edx
252+
; X86-AVX512-NEXT: movl $286331152, %ecx # imm = 0x11111110
253+
; X86-AVX512-NEXT: mulxl %ecx, %edi, %esi
254+
; X86-AVX512-NEXT: imull $286331153, %edx, %ebx # imm = 0x11111111
255+
; X86-AVX512-NEXT: addl %esi, %ebx
256+
; X86-AVX512-NEXT: imull $286331152, %eax, %esi # imm = 0x11111110
257+
; X86-AVX512-NEXT: addl %ebx, %esi
258+
; X86-AVX512-NEXT: vmovd %edi, %xmm0
259+
; X86-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
260+
; X86-AVX512-NEXT: xorl $17895697, %eax # imm = 0x1111111
261+
; X86-AVX512-NEXT: xorl $286331153, %edx # imm = 0x11111111
262+
; X86-AVX512-NEXT: mulxl %ecx, %esi, %ecx
263+
; X86-AVX512-NEXT: imull $286331153, %edx, %edx # imm = 0x11111111
264+
; X86-AVX512-NEXT: addl %ecx, %edx
265+
; X86-AVX512-NEXT: imull $286331152, %eax, %eax # imm = 0x11111110
266+
; X86-AVX512-NEXT: addl %edx, %eax
267+
; X86-AVX512-NEXT: vmovd %esi, %xmm1
268+
; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
269+
; X86-AVX512-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
270+
; X86-AVX512-NEXT: vpsrlw $4, %xmm1, %xmm0
271+
; X86-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
272+
; X86-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
273+
; X86-AVX512-NEXT: popl %esi
274+
; X86-AVX512-NEXT: popl %edi
275+
; X86-AVX512-NEXT: popl %ebx
276+
; X86-AVX512-NEXT: retl
277+
;
278+
; X64-AVX512-LABEL: produceShuffleVectorForByte:
279+
; X64-AVX512: # %bb.0: # %entry
280+
; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0
281+
; X64-AVX512-NEXT: vptestnmb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
282+
; X64-AVX512-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
283+
; X64-AVX512-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
284+
; X64-AVX512-NEXT: vmovq %xmm0, %rax
285+
; X64-AVX512-NEXT: movabsq $76861433640456465, %rcx # imm = 0x111111111111111
286+
; X64-AVX512-NEXT: xorq %rax, %rcx
287+
; X64-AVX512-NEXT: movabsq $1229782938247303440, %rax # imm = 0x1111111111111110
288+
; X64-AVX512-NEXT: imulq %rcx, %rax
289+
; X64-AVX512-NEXT: vmovq %rax, %xmm0
290+
; X64-AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1}
291+
; X64-AVX512-NEXT: vpsrlw $4, %xmm0, %xmm1
292+
; X64-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
293+
; X64-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
294+
; X64-AVX512-NEXT: retq
295+
entry:
296+
%const = bitcast i64 1229782938247303440 to i64
297+
%1 = insertelement <1 x i8> poison, i8 %0, i64 0
298+
%2 = shufflevector <1 x i8> %1, <1 x i8> poison, <8 x i32> zeroinitializer
299+
%3 = and <8 x i8> %2, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
300+
%.not.not = icmp eq <8 x i8> %3, zeroinitializer
301+
%4 = select <8 x i1> %.not.not, <8 x i8> <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17>, <8 x i8> zeroinitializer
302+
%5 = bitcast <8 x i8> %4 to i64
303+
%6 = mul i64 %5, %const
304+
%7 = bitcast i64 %6 to <8 x i8>
305+
%8 = xor i64 %5, 76861433640456465
306+
%9 = mul i64 %8, %const
307+
%10 = bitcast i64 %9 to <8 x i8>
308+
%11 = select <8 x i1> %.not.not, <8 x i8> %7, <8 x i8> %10
309+
%12 = and <8 x i8> %11, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>
310+
%13 = lshr <8 x i8> %11, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
311+
%14 = shufflevector <8 x i8> %12, <8 x i8> %13, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
312+
ret <16 x i8> %14
313+
}

0 commit comments

Comments
 (0)