Skip to content

Commit 20be96b

Browse files
committed
[X86] Pre-commit more unit tests for D134477
1 parent 14b9505 commit 20be96b

File tree

1 file changed

+278
-4
lines changed

1 file changed

+278
-4
lines changed

llvm/test/CodeGen/X86/vector-interleave.ll

Lines changed: 278 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,282 @@ define <64 x i16> @interleave8x8(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x
138138
ret <64 x i16> %result
139139
}
140140

141-
define <64 x i8> @interleave2x32(<32 x i8> %a, <32 x i8> %b) {
142-
; SSE-LABEL: interleave2x32:
141+
define <8 x double> @interleave2x4f64(<4 x double> %a, <4 x double> %b) {
142+
; SSE-LABEL: interleave2x4f64:
143+
; SSE: # %bb.0:
144+
; SSE-NEXT: movaps %xmm0, %xmm4
145+
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
146+
; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm2[1]
147+
; SSE-NEXT: movaps %xmm1, %xmm2
148+
; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
149+
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
150+
; SSE-NEXT: movaps %xmm4, %xmm1
151+
; SSE-NEXT: retq
152+
;
153+
; AVX1-LABEL: interleave2x4f64:
154+
; AVX1: # %bb.0:
155+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3]
156+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
157+
; AVX1-NEXT: vshufpd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[3],ymm2[3]
158+
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm0[1],xmm1[1]
159+
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
160+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
161+
; AVX1-NEXT: vmovapd %ymm2, %ymm1
162+
; AVX1-NEXT: retq
163+
;
164+
; AVX2-LABEL: interleave2x4f64:
165+
; AVX2: # %bb.0:
166+
; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,0,2,1]
167+
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[0,1,1,3]
168+
; AVX2-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
169+
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,3]
170+
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3]
171+
; AVX2-NEXT: vshufpd {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
172+
; AVX2-NEXT: vmovapd %ymm2, %ymm0
173+
; AVX2-NEXT: retq
174+
%result = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
175+
ret <8 x double> %result
176+
}
177+
178+
define <8 x i64> @interleave2x4i64(<4 x i64> %a, <4 x i64> %b) {
179+
; SSE-LABEL: interleave2x4i64:
180+
; SSE: # %bb.0:
181+
; SSE-NEXT: movaps %xmm1, %xmm4
182+
; SSE-NEXT: movaps %xmm0, %xmm1
183+
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
184+
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
185+
; SSE-NEXT: movaps %xmm4, %xmm2
186+
; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
187+
; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm3[1]
188+
; SSE-NEXT: movaps %xmm4, %xmm3
189+
; SSE-NEXT: retq
190+
;
191+
; AVX1-LABEL: interleave2x4i64:
192+
; AVX1: # %bb.0:
193+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3]
194+
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3,2,3]
195+
; AVX1-NEXT: vshufpd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[3],ymm2[3]
196+
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm0[1],xmm1[1]
197+
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
198+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
199+
; AVX1-NEXT: vmovapd %ymm2, %ymm1
200+
; AVX1-NEXT: retq
201+
;
202+
; AVX2-LABEL: interleave2x4i64:
203+
; AVX2: # %bb.0:
204+
; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,0,2,1]
205+
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[0,1,1,3]
206+
; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
207+
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
208+
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
209+
; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
210+
; AVX2-NEXT: vmovaps %ymm2, %ymm0
211+
; AVX2-NEXT: retq
212+
%result = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
213+
ret <8 x i64> %result
214+
}
215+
216+
define <16 x float> @interleave2x8f32(<8 x float> %a, <8 x float> %b) {
217+
; SSE-LABEL: interleave2x8f32:
218+
; SSE: # %bb.0:
219+
; SSE-NEXT: movaps %xmm1, %xmm4
220+
; SSE-NEXT: movaps %xmm0, %xmm1
221+
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
222+
; SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
223+
; SSE-NEXT: movaps %xmm4, %xmm2
224+
; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
225+
; SSE-NEXT: unpckhps {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
226+
; SSE-NEXT: movaps %xmm4, %xmm3
227+
; SSE-NEXT: retq
228+
;
229+
; AVX1-LABEL: interleave2x8f32:
230+
; AVX1: # %bb.0:
231+
; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
232+
; AVX1-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
233+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
234+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
235+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
236+
; AVX1-NEXT: vunpckhps {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
237+
; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
238+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1
239+
; AVX1-NEXT: vmovaps %ymm2, %ymm0
240+
; AVX1-NEXT: retq
241+
;
242+
; AVX2-LABEL: interleave2x8f32:
243+
; AVX2: # %bb.0:
244+
; AVX2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
245+
; AVX2-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
246+
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
247+
; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm1
248+
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
249+
; AVX2-NEXT: vunpckhps {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
250+
; AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
251+
; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1
252+
; AVX2-NEXT: vmovaps %ymm2, %ymm0
253+
; AVX2-NEXT: retq
254+
%result = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
255+
ret <16 x float> %result
256+
}
257+
258+
define <16 x i32> @interleave2x8i32(<8 x i32> %a, <8 x i32> %b) {
259+
; SSE-LABEL: interleave2x8i32:
260+
; SSE: # %bb.0:
261+
; SSE-NEXT: movaps %xmm1, %xmm4
262+
; SSE-NEXT: movaps %xmm0, %xmm1
263+
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
264+
; SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
265+
; SSE-NEXT: movaps %xmm4, %xmm2
266+
; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
267+
; SSE-NEXT: unpckhps {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
268+
; SSE-NEXT: movaps %xmm4, %xmm3
269+
; SSE-NEXT: retq
270+
;
271+
; AVX1-LABEL: interleave2x8i32:
272+
; AVX1: # %bb.0:
273+
; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
274+
; AVX1-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
275+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
276+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
277+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
278+
; AVX1-NEXT: vunpckhps {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
279+
; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
280+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1
281+
; AVX1-NEXT: vmovaps %ymm2, %ymm0
282+
; AVX1-NEXT: retq
283+
;
284+
; AVX2-LABEL: interleave2x8i32:
285+
; AVX2: # %bb.0:
286+
; AVX2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
287+
; AVX2-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
288+
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
289+
; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm1
290+
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
291+
; AVX2-NEXT: vunpckhps {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
292+
; AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
293+
; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1
294+
; AVX2-NEXT: vmovaps %ymm2, %ymm0
295+
; AVX2-NEXT: retq
296+
%result = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
297+
ret <16 x i32> %result
298+
}
299+
300+
define <32 x i16> @interleave2x16i16(<16 x i16> %a, <16 x i16> %b) {
301+
; SSE-LABEL: interleave2x16i16:
302+
; SSE: # %bb.0:
303+
; SSE-NEXT: movdqa %xmm1, %xmm4
304+
; SSE-NEXT: movdqa %xmm0, %xmm1
305+
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
306+
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
307+
; SSE-NEXT: movdqa %xmm4, %xmm2
308+
; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
309+
; SSE-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
310+
; SSE-NEXT: movdqa %xmm4, %xmm3
311+
; SSE-NEXT: retq
312+
;
313+
; AVX1-LABEL: interleave2x16i16:
314+
; AVX1: # %bb.0:
315+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
316+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
317+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
318+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
319+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
320+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
321+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
322+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1
323+
; AVX1-NEXT: vmovaps %ymm2, %ymm0
324+
; AVX1-NEXT: retq
325+
;
326+
; AVX2-LABEL: interleave2x16i16:
327+
; AVX2: # %bb.0:
328+
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
329+
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
330+
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
331+
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
332+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
333+
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
334+
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
335+
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm1
336+
; AVX2-NEXT: vmovdqa %ymm2, %ymm0
337+
; AVX2-NEXT: retq
338+
%result = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
339+
ret <32 x i16> %result
340+
}
341+
342+
define <64 x i16> @interleave2x32i16(<32 x i16> %a, <32 x i16> %b) {
343+
; SSE-LABEL: interleave2x32i16:
344+
; SSE: # %bb.0:
345+
; SSE-NEXT: movq %rdi, %rax
346+
; SSE-NEXT: movdqa %xmm0, %xmm8
347+
; SSE-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm4[0],xmm8[1],xmm4[1],xmm8[2],xmm4[2],xmm8[3],xmm4[3]
348+
; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
349+
; SSE-NEXT: movdqa %xmm1, %xmm4
350+
; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
351+
; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
352+
; SSE-NEXT: movdqa %xmm2, %xmm5
353+
; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
354+
; SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
355+
; SSE-NEXT: movdqa %xmm3, %xmm6
356+
; SSE-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
357+
; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
358+
; SSE-NEXT: movdqa %xmm3, 112(%rdi)
359+
; SSE-NEXT: movdqa %xmm6, 96(%rdi)
360+
; SSE-NEXT: movdqa %xmm2, 80(%rdi)
361+
; SSE-NEXT: movdqa %xmm5, 64(%rdi)
362+
; SSE-NEXT: movdqa %xmm1, 48(%rdi)
363+
; SSE-NEXT: movdqa %xmm4, 32(%rdi)
364+
; SSE-NEXT: movdqa %xmm0, 16(%rdi)
365+
; SSE-NEXT: movdqa %xmm8, (%rdi)
366+
; SSE-NEXT: retq
367+
;
368+
; AVX1-LABEL: interleave2x32i16:
369+
; AVX1: # %bb.0:
370+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
371+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
372+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
373+
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
374+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
375+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
376+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
377+
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5
378+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
379+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
380+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2
381+
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm0
382+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
383+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
384+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
385+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3
386+
; AVX1-NEXT: vmovaps %ymm4, %ymm0
387+
; AVX1-NEXT: vmovaps %ymm5, %ymm1
388+
; AVX1-NEXT: retq
389+
;
390+
; AVX2-LABEL: interleave2x32i16:
391+
; AVX2: # %bb.0:
392+
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
393+
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
394+
; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
395+
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
396+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
397+
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
398+
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
399+
; AVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
400+
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
401+
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
402+
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm2
403+
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm0
404+
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
405+
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
406+
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
407+
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
408+
; AVX2-NEXT: vmovdqa %ymm4, %ymm0
409+
; AVX2-NEXT: vmovdqa %ymm5, %ymm1
410+
; AVX2-NEXT: retq
411+
%result = shufflevector <32 x i16> %a, <32 x i16> %b, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
412+
ret <64 x i16> %result
413+
}
414+
415+
define <64 x i8> @interleave2x32i8(<32 x i8> %a, <32 x i8> %b) {
416+
; SSE-LABEL: interleave2x32i8:
143417
; SSE: # %bb.0:
144418
; SSE-NEXT: movdqa %xmm1, %xmm4
145419
; SSE-NEXT: movdqa %xmm0, %xmm1
@@ -151,7 +425,7 @@ define <64 x i8> @interleave2x32(<32 x i8> %a, <32 x i8> %b) {
151425
; SSE-NEXT: movdqa %xmm4, %xmm3
152426
; SSE-NEXT: retq
153427
;
154-
; AVX1-LABEL: interleave2x32:
428+
; AVX1-LABEL: interleave2x32i8:
155429
; AVX1: # %bb.0:
156430
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
157431
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
@@ -164,7 +438,7 @@ define <64 x i8> @interleave2x32(<32 x i8> %a, <32 x i8> %b) {
164438
; AVX1-NEXT: vmovaps %ymm2, %ymm0
165439
; AVX1-NEXT: retq
166440
;
167-
; AVX2-LABEL: interleave2x32:
441+
; AVX2-LABEL: interleave2x32i8:
168442
; AVX2: # %bb.0:
169443
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
170444
; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]

0 commit comments

Comments
 (0)