@@ -977,3 +977,65 @@ define <16 x i32> @blend_of_permutes_v16i32(<8 x i64> %a0, <8x i64> %a1) {
977
977
%r = shufflevector <16 x i32 > %x0 , <16 x i32 > %x1 , <16 x i32 > <i32 0 , i32 17 , i32 2 , i32 19 , i32 20 , i32 5 , i32 6 , i32 23 , i32 8 , i32 25 , i32 10 , i32 27 , i32 28 , i32 13 , i32 14 , i32 31 >
978
978
ret <16 x i32 > %r
979
979
}
980
+
981
+ define <8 x double > @concat_vpermilvar_v8f64_v2f64 (<2 x double > %a0 , <2 x double > %a1 , <2 x double > %a2 , <2 x double > %a3 , <8 x i64 > %m ) nounwind {
982
+ ; X86-LABEL: concat_vpermilvar_v8f64_v2f64:
983
+ ; X86: # %bb.0:
984
+ ; X86-NEXT: pushl %ebp
985
+ ; X86-NEXT: movl %esp, %ebp
986
+ ; X86-NEXT: andl $-64, %esp
987
+ ; X86-NEXT: subl $64, %esp
988
+ ; X86-NEXT: vmovapd 8(%ebp), %xmm3
989
+ ; X86-NEXT: vpermilpd 72(%ebp), %xmm0, %xmm0
990
+ ; X86-NEXT: vpermilpd 88(%ebp), %xmm1, %xmm1
991
+ ; X86-NEXT: vpermilpd 104(%ebp), %xmm2, %xmm2
992
+ ; X86-NEXT: vpermilpd 120(%ebp), %xmm3, %xmm3
993
+ ; X86-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
994
+ ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
995
+ ; X86-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
996
+ ; X86-NEXT: movl %ebp, %esp
997
+ ; X86-NEXT: popl %ebp
998
+ ; X86-NEXT: retl
999
+ ;
1000
+ ; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
1001
+ ; X64: # %bb.0:
1002
+ ; X64-NEXT: vextractf128 $1, %ymm4, %xmm5
1003
+ ; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm6
1004
+ ; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm7
1005
+ ; X64-NEXT: vpermilpd %xmm4, %xmm0, %xmm0
1006
+ ; X64-NEXT: vpermilpd %xmm5, %xmm1, %xmm1
1007
+ ; X64-NEXT: vpermilpd %xmm6, %xmm2, %xmm2
1008
+ ; X64-NEXT: vpermilpd %xmm7, %xmm3, %xmm3
1009
+ ; X64-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1010
+ ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1011
+ ; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1012
+ ; X64-NEXT: retq
1013
+ %m0 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 0 , i32 1 >
1014
+ %m1 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 2 , i32 3 >
1015
+ %m2 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 4 , i32 5 >
1016
+ %m3 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 6 , i32 7 >
1017
+ %v0 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a0 , <2 x i64 > %m0 )
1018
+ %v1 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a1 , <2 x i64 > %m1 )
1019
+ %v2 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a2 , <2 x i64 > %m2 )
1020
+ %v3 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a3 , <2 x i64 > %m3 )
1021
+ %lo = shufflevector <2 x double > %v0 , <2 x double > %v1 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1022
+ %hi = shufflevector <2 x double > %v2 , <2 x double > %v3 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1023
+ %res = shufflevector <4 x double > %lo , <4 x double > %hi , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1024
+ ret <8 x double > %res
1025
+ }
1026
+
1027
+ define <8 x double > @concat_vpermilvar_v8f64_v4f64 (<4 x double > %a0 , <4 x double > %a1 , <8 x i64 > %m ) nounwind {
1028
+ ; CHECK-LABEL: concat_vpermilvar_v8f64_v4f64:
1029
+ ; CHECK: # %bb.0:
1030
+ ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
1031
+ ; CHECK-NEXT: vpermilpd %ymm2, %ymm0, %ymm0
1032
+ ; CHECK-NEXT: vpermilpd %ymm3, %ymm1, %ymm1
1033
+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1034
+ ; CHECK-NEXT: ret{{[l|q]}}
1035
+ %m0 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1036
+ %m1 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
1037
+ %v0 = tail call noundef <4 x double > @llvm.x86.avx.vpermilvar.pd.256 (<4 x double > %a0 , <4 x i64 > %m0 )
1038
+ %v1 = tail call noundef <4 x double > @llvm.x86.avx.vpermilvar.pd.256 (<4 x double > %a1 , <4 x i64 > %m1 )
1039
+ %res = shufflevector <4 x double > %v0 , <4 x double > %v1 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1040
+ ret <8 x double > %res
1041
+ }
0 commit comments