@@ -846,6 +846,26 @@ define <32 x i8> @concat_pshufb_unnecessary(<16 x i8> %a0, <16 x i8> %a1, <16 x
846
846
ret <32 x i8 > %res
847
847
}
848
848
849
+ define <8 x float > @demandedelts_vpermps (<8 x float > %a0 , <8 x float > %a1 ) {
850
+ ; AVX2-LABEL: demandedelts_vpermps:
851
+ ; AVX2: # %bb.0:
852
+ ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,1,1,0]
853
+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
854
+ ; AVX2-NEXT: ret{{[l|q]}}
855
+ ;
856
+ ; AVX512-LABEL: demandedelts_vpermps:
857
+ ; AVX512: # %bb.0:
858
+ ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
859
+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
860
+ ; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm2 = [3,1,1,0,20,21,22,23]
861
+ ; AVX512-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
862
+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
863
+ ; AVX512-NEXT: ret{{[l|q]}}
864
+ %lo = call <8 x float > @llvm.x86.avx2.permps (<8 x float > %a0 , <8 x i32 > <i32 3 , i32 1 , i32 1 , i32 0 , i32 0 , i32 0 , i32 7 , i32 7 >)
865
+ %hi = shufflevector <8 x float > %lo , <8 x float > %a1 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 12 , i32 13 , i32 14 , i32 15 >
866
+ ret <8 x float > %hi
867
+ }
868
+
849
869
define <8 x i32 > @constant_fold_permd () {
850
870
; AVX2-LABEL: constant_fold_permd:
851
871
; AVX2: # %bb.0:
0 commit comments