Skip to content

Commit fccc3ab

Browse files
committed
[X86][SSE] Added shuffle based integer zero extension tests.
llvm-svn: 230145
1 parent 08f36fd commit fccc3ab

File tree

1 file changed

+158
-0
lines changed

1 file changed

+158
-0
lines changed

llvm/test/CodeGen/X86/vector-zext.ll

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,3 +328,161 @@ entry:
328328
%Y = zext <4 x i32> %X to <4 x i64>
329329
ret <4 x i64>%Y
330330
}
331+
332+
define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
333+
; SSE2-LABEL: shuf_zext_8i16_to_8i32:
334+
; SSE2: # BB#0: # %entry
335+
; SSE2-NEXT: movdqa %xmm0, %xmm1
336+
; SSE2-NEXT: pxor %xmm2, %xmm2
337+
; SSE2-NEXT: # kill
338+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
339+
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
340+
; SSE2-NEXT: retq
341+
;
342+
; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
343+
; SSSE3: # BB#0: # %entry
344+
; SSSE3-NEXT: movdqa %xmm0, %xmm1
345+
; SSSE3-NEXT: pxor %xmm2, %xmm2
346+
; SSSE3-NEXT: # kill
347+
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
348+
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
349+
; SSSE3-NEXT: retq
350+
;
351+
; SSE41-LABEL: shuf_zext_8i16_to_8i32:
352+
; SSE41: # BB#0: # %entry
353+
; SSE41-NEXT: movdqa %xmm0, %xmm1
354+
; SSE41-NEXT: pxor %xmm2, %xmm2
355+
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
356+
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
357+
; SSE41-NEXT: retq
358+
;
359+
; AVX1-LABEL: shuf_zext_8i16_to_8i32:
360+
; AVX1: # BB#0: # %entry
361+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
362+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
363+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
364+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
365+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
366+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
367+
; AVX1-NEXT: retq
368+
;
369+
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
370+
; AVX2: # BB#0: # %entry
371+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
372+
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
373+
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
374+
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
375+
; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
376+
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
377+
; AVX2-NEXT: retq
378+
entry:
379+
%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
380+
%Z = bitcast <16 x i16> %B to <8 x i32>
381+
ret <8 x i32> %Z
382+
}
383+
384+
define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
385+
; SSE2-LABEL: shuf_zext_4i32_to_4i64:
386+
; SSE2: # BB#0: # %entry
387+
; SSE2-NEXT: movdqa %xmm0, %xmm1
388+
; SSE2-NEXT: pxor %xmm2, %xmm2
389+
; SSE2-NEXT: # kill
390+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
391+
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
392+
; SSE2-NEXT: retq
393+
;
394+
; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
395+
; SSSE3: # BB#0: # %entry
396+
; SSSE3-NEXT: movdqa %xmm0, %xmm1
397+
; SSSE3-NEXT: pxor %xmm2, %xmm2
398+
; SSSE3-NEXT: # kill
399+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
400+
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
401+
; SSSE3-NEXT: retq
402+
;
403+
; SSE41-LABEL: shuf_zext_4i32_to_4i64:
404+
; SSE41: # BB#0: # %entry
405+
; SSE41-NEXT: movdqa %xmm0, %xmm1
406+
; SSE41-NEXT: pxor %xmm2, %xmm2
407+
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
408+
; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
409+
; SSE41-NEXT: retq
410+
;
411+
; AVX1-LABEL: shuf_zext_4i32_to_4i64:
412+
; AVX1: # BB#0: # %entry
413+
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
414+
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0]
415+
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
416+
; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
417+
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
418+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
419+
; AVX1-NEXT: retq
420+
;
421+
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
422+
; AVX2: # BB#0: # %entry
423+
; AVX2-NEXT: # kill
424+
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
425+
; AVX2-NEXT: xorl %eax, %eax
426+
; AVX2-NEXT: vmovd %eax, %xmm1
427+
; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
428+
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
429+
; AVX2-NEXT: retq
430+
entry:
431+
%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
432+
%Z = bitcast <8 x i32> %B to <4 x i64>
433+
ret <4 x i64> %Z
434+
}
435+
436+
define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
437+
; SSE2-LABEL: shuf_zext_8i8_to_8i32:
438+
; SSE2: # BB#0: # %entry
439+
; SSE2-NEXT: pand .LCPI9_0(%rip), %xmm0
440+
; SSE2-NEXT: packuswb %xmm0, %xmm0
441+
; SSE2-NEXT: pxor %xmm1, %xmm1
442+
; SSE2-NEXT: movdqa %xmm0, %xmm2
443+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
444+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
445+
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
446+
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
447+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
448+
; SSE2-NEXT: pandn %xmm0, %xmm1
449+
; SSE2-NEXT: movdqa %xmm2, %xmm0
450+
; SSE2-NEXT: retq
451+
;
452+
; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
453+
; SSSE3: # BB#0: # %entry
454+
; SSSE3-NEXT: movdqa %xmm0, %xmm1
455+
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
456+
; SSSE3-NEXT: pxor %xmm2, %xmm2
457+
; SSSE3-NEXT: movdqa %xmm1, %xmm0
458+
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
459+
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
460+
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
461+
; SSSE3-NEXT: retq
462+
;
463+
; SSE41-LABEL: shuf_zext_8i8_to_8i32:
464+
; SSE41: # BB#0: # %entry
465+
; SSE41-NEXT: movdqa %xmm0, %xmm1
466+
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
467+
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
468+
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
469+
; SSE41-NEXT: retq
470+
;
471+
; AVX1-LABEL: shuf_zext_8i8_to_8i32:
472+
; AVX1: # BB#0: # %entry
473+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
474+
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
475+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
476+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
477+
; AVX1-NEXT: retq
478+
;
479+
; AVX2-LABEL: shuf_zext_8i8_to_8i32:
480+
; AVX2: # BB#0: # %entry
481+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
482+
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
483+
; AVX2-NEXT: retq
484+
entry:
485+
%B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
486+
%Z = bitcast <32 x i8> %B to <8 x i32>
487+
ret <8 x i32> %Z
488+
}

0 commit comments

Comments
 (0)