Skip to content

Commit cfdf09b

Browse files
committed
[X86][SSE] Add PAVG test case from PR41316
llvm-svn: 357346
1 parent 88335c2 commit cfdf09b

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

llvm/test/CodeGen/X86/avg.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2473,3 +2473,83 @@ define <1 x i8> @avg_v1i8(<1 x i8> %x, <1 x i8> %y) {
24732473
ret <1 x i8> %f
24742474
}
24752475

2476+
; _mm_avg_epu16( _mm_slli_epi16(a, 2), _mm_slli_epi16(b, 2))
2477+
define <2 x i64> @PR41316(<2 x i64>, <2 x i64>) {
2478+
; SSE2-LABEL: PR41316:
2479+
; SSE2: # %bb.0:
2480+
; SSE2-NEXT: psllw $2, %xmm0
2481+
; SSE2-NEXT: psllw $2, %xmm1
2482+
; SSE2-NEXT: pxor %xmm2, %xmm2
2483+
; SSE2-NEXT: movdqa %xmm1, %xmm3
2484+
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2485+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2486+
; SSE2-NEXT: por {{.*}}(%rip), %xmm0
2487+
; SSE2-NEXT: movdqa %xmm0, %xmm4
2488+
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
2489+
; SSE2-NEXT: paddd %xmm3, %xmm4
2490+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2491+
; SSE2-NEXT: paddd %xmm1, %xmm0
2492+
; SSE2-NEXT: pslld $15, %xmm4
2493+
; SSE2-NEXT: psrad $16, %xmm4
2494+
; SSE2-NEXT: pslld $15, %xmm0
2495+
; SSE2-NEXT: psrad $16, %xmm0
2496+
; SSE2-NEXT: packssdw %xmm4, %xmm0
2497+
; SSE2-NEXT: retq
2498+
;
2499+
; AVX1-LABEL: PR41316:
2500+
; AVX1: # %bb.0:
2501+
; AVX1-NEXT: vpsllw $2, %xmm0, %xmm0
2502+
; AVX1-NEXT: vpsllw $2, %xmm1, %xmm1
2503+
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2504+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2505+
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2506+
; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
2507+
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2508+
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
2509+
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2510+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
2511+
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm1
2512+
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
2513+
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2514+
; AVX1-NEXT: retq
2515+
;
2516+
; AVX2-LABEL: PR41316:
2517+
; AVX2: # %bb.0:
2518+
; AVX2-NEXT: vpsllw $2, %xmm0, %xmm0
2519+
; AVX2-NEXT: vpsllw $2, %xmm1, %xmm1
2520+
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2521+
; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
2522+
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2523+
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
2524+
; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0
2525+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2526+
; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2527+
; AVX2-NEXT: vzeroupper
2528+
; AVX2-NEXT: retq
2529+
;
2530+
; AVX512-LABEL: PR41316:
2531+
; AVX512: # %bb.0:
2532+
; AVX512-NEXT: vpsllw $2, %xmm0, %xmm0
2533+
; AVX512-NEXT: vpsllw $2, %xmm1, %xmm1
2534+
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2535+
; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
2536+
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2537+
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
2538+
; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0
2539+
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
2540+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2541+
; AVX512-NEXT: vzeroupper
2542+
; AVX512-NEXT: retq
2543+
%3 = bitcast <2 x i64> %0 to <8 x i16>
2544+
%4 = shl <8 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
2545+
%5 = bitcast <2 x i64> %1 to <8 x i16>
2546+
%6 = shl <8 x i16> %5, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
2547+
%7 = zext <8 x i16> %6 to <8 x i32>
2548+
%8 = or <8 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2549+
%9 = zext <8 x i16> %8 to <8 x i32>
2550+
%10 = add nuw nsw <8 x i32> %9, %7
2551+
%11 = lshr <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2552+
%12 = trunc <8 x i32> %11 to <8 x i16>
2553+
%13 = bitcast <8 x i16> %12 to <2 x i64>
2554+
ret <2 x i64> %13
2555+
}

0 commit comments

Comments
 (0)