Skip to content

Commit 20cdffb

Browse files
committed
[X86] combineConcatVectorOps - extend VPERMILPD handling to support 512-bit types
1 parent 24e88b0 commit 20cdffb

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58113,11 +58113,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5811358113
DAG.getNode(X86ISD::VPERMILPI, DL, FloatVT, Res, Op0.getOperand(1));
5811458114
return DAG.getBitcast(VT, Res);
5811558115
}
58116-
// TODO: v8f64 VPERMILPI concatenation.
58117-
if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
58118-
uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
58119-
uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
58120-
uint64_t Idx = ((Idx1 & 3) << 2) | (Idx0 & 3);
58116+
if (!IsSplat && (VT == MVT::v4f64 || VT == MVT::v8f64)) {
58117+
unsigned NumSubElts = Op0.getValueType().getVectorNumElements();
58118+
uint64_t Mask = (1ULL << NumSubElts) - 1;
58119+
uint64_t Idx = 0;
58120+
for (unsigned I = 0; I != NumOps; ++I) {
58121+
uint64_t SubIdx = Ops[I].getConstantOperandVal(1);
58122+
Idx |= (SubIdx & Mask) << (I * NumSubElts);
58123+
}
5812158124
return DAG.getNode(X86ISD::VPERMILPI, DL, VT,
5812258125
ConcatSubOperand(VT, Ops, 0),
5812358126
DAG.getTargetConstant(Idx, DL, MVT::i8));

llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1678,11 +1678,10 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x
16781678
; AVX512F: # %bb.0:
16791679
; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
16801680
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1681+
; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
16811682
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1682-
; AVX512F-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1683-
; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
1684-
; AVX512F-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2]
1685-
; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1683+
; AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1684+
; AVX512F-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
16861685
; AVX512F-NEXT: retq
16871686
;
16881687
; AVX512F-32-LABEL: concat_shuffle_v8f64_v2f64_10325476:
@@ -1691,10 +1690,9 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x
16911690
; AVX512F-32-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
16921691
; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
16931692
; AVX512F-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1694-
; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
16951693
; AVX512F-32-NEXT: vinsertf128 $1, {{[0-9]+}}(%esp), %ymm2, %ymm1
1696-
; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2]
16971694
; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1695+
; AVX512F-32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
16981696
; AVX512F-32-NEXT: addl $12, %esp
16991697
; AVX512F-32-NEXT: retl
17001698
%s0 = shufflevector <2 x double> %a0, <2 x double> poison, <2 x i32> <i32 1, i32 0>

0 commit comments

Comments
 (0)