Skip to content

Commit fa52a54

Browse files
committed
[X86] Add tests showing failure to concatenate X86ISD::VPERMILPV nodes.
1 parent b62e149 commit fa52a54

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,22 @@ define <8 x i32> @concat_self_v8i32(<4 x i32> %x) {
671671
ret <8 x i32> %a
672672
}
673673

674+
define <4 x double> @concat_vpermilvar_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1, <4 x i64> %m) {
675+
; CHECK-LABEL: concat_vpermilvar_v4f64_v2f64:
676+
; CHECK: # %bb.0:
677+
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
678+
; CHECK-NEXT: vpermilpd %xmm2, %xmm0, %xmm0
679+
; CHECK-NEXT: vpermilpd %xmm3, %xmm1, %xmm1
680+
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
681+
; CHECK-NEXT: ret{{[l|q]}}
682+
%m0 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
683+
%m1 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
684+
%v0 = tail call noundef <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %m0)
685+
%v1 = tail call noundef <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a1, <2 x i64> %m1)
686+
%res = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
687+
ret <4 x double> %res
688+
}
689+
674690
define <16 x i64> @bit_reversal_permutation(<16 x i64> %a0) nounwind {
675691
; X86-AVX1-LABEL: bit_reversal_permutation:
676692
; X86-AVX1: # %bb.0:

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,3 +977,65 @@ define <16 x i32> @blend_of_permutes_v16i32(<8 x i64> %a0, <8x i64> %a1) {
977977
%r = shufflevector <16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31>
978978
ret <16 x i32> %r
979979
}
980+
981+
define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3, <8 x i64> %m) nounwind {
982+
; X86-LABEL: concat_vpermilvar_v8f64_v2f64:
983+
; X86: # %bb.0:
984+
; X86-NEXT: pushl %ebp
985+
; X86-NEXT: movl %esp, %ebp
986+
; X86-NEXT: andl $-64, %esp
987+
; X86-NEXT: subl $64, %esp
988+
; X86-NEXT: vmovapd 8(%ebp), %xmm3
989+
; X86-NEXT: vpermilpd 72(%ebp), %xmm0, %xmm0
990+
; X86-NEXT: vpermilpd 88(%ebp), %xmm1, %xmm1
991+
; X86-NEXT: vpermilpd 104(%ebp), %xmm2, %xmm2
992+
; X86-NEXT: vpermilpd 120(%ebp), %xmm3, %xmm3
993+
; X86-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
994+
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
995+
; X86-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
996+
; X86-NEXT: movl %ebp, %esp
997+
; X86-NEXT: popl %ebp
998+
; X86-NEXT: retl
999+
;
1000+
; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
1001+
; X64: # %bb.0:
1002+
; X64-NEXT: vextractf128 $1, %ymm4, %xmm5
1003+
; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm6
1004+
; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm7
1005+
; X64-NEXT: vpermilpd %xmm4, %xmm0, %xmm0
1006+
; X64-NEXT: vpermilpd %xmm5, %xmm1, %xmm1
1007+
; X64-NEXT: vpermilpd %xmm6, %xmm2, %xmm2
1008+
; X64-NEXT: vpermilpd %xmm7, %xmm3, %xmm3
1009+
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1010+
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1011+
; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1012+
; X64-NEXT: retq
1013+
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
1014+
%m1 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
1015+
%m2 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
1016+
%m3 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
1017+
%v0 = tail call noundef <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %m0)
1018+
%v1 = tail call noundef <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a1, <2 x i64> %m1)
1019+
%v2 = tail call noundef <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a2, <2 x i64> %m2)
1020+
%v3 = tail call noundef <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a3, <2 x i64> %m3)
1021+
%lo = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1022+
%hi = shufflevector <2 x double> %v2, <2 x double> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1023+
%res = shufflevector <4 x double> %lo, <4 x double> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1024+
ret <8 x double> %res
1025+
}
1026+
1027+
define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1, <8 x i64> %m) nounwind {
1028+
; CHECK-LABEL: concat_vpermilvar_v8f64_v4f64:
1029+
; CHECK: # %bb.0:
1030+
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
1031+
; CHECK-NEXT: vpermilpd %ymm2, %ymm0, %ymm0
1032+
; CHECK-NEXT: vpermilpd %ymm3, %ymm1, %ymm1
1033+
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1034+
; CHECK-NEXT: ret{{[l|q]}}
1035+
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1036+
%m1 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1037+
%v0 = tail call noundef <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %m0)
1038+
%v1 = tail call noundef <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a1, <4 x i64> %m1)
1039+
%res = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1040+
ret <8 x double> %res
1041+
}

0 commit comments

Comments
 (0)