Skip to content

Commit 9e9fe5e

Browse files
committed
[X86] combineConcatVectorOps - concat mixed v2f64 shuffles into 4f64 vshufpd
We can always concatenate vXf64 per-lane shuffles into a single vshufpd instruction, assuming we can profitably concatenate at least one of its operands I was really hoping to get this into combineX86ShufflesRecursively but it still can't handle concatenation as well as combineConcatVectorOps yet.
1 parent 44a047c commit 9e9fe5e

File tree

2 files changed

+215
-229
lines changed

2 files changed

+215
-229
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58493,14 +58493,23 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5849358493
const APInt &SrcIdx0 = Src0.getConstantOperandAPInt(1);
5849458494
const APInt &SrcIdx1 = Src1.getConstantOperandAPInt(1);
5849558495
// concat(extract_subvector(v0), extract_subvector(v1)) -> vperm2x128.
58496-
// Only concat of subvector high halves which vperm2x128 is best at.
58496+
// Only concat of subvector high halves which vperm2x128 is best at or if
58497+
// it should fold into a subvector broadcast.
5849758498
if (VT.is256BitVector() && SrcVT0.is256BitVector() &&
58498-
SrcVT1.is256BitVector() && SrcIdx0 == (NumSrcElts0 / 2) &&
58499-
SrcIdx1 == (NumSrcElts1 / 2)) {
58500-
return DAG.getNode(X86ISD::VPERM2X128, DL, VT,
58501-
DAG.getBitcast(VT, Src0.getOperand(0)),
58502-
DAG.getBitcast(VT, Src1.getOperand(0)),
58503-
DAG.getTargetConstant(0x31, DL, MVT::i8));
58499+
SrcVT1.is256BitVector()) {
58500+
assert((SrcIdx0 == 0 || SrcIdx0 == (NumSrcElts0 / 2)) &&
58501+
(SrcIdx1 == 0 || SrcIdx1 == (NumSrcElts1 / 2)) &&
58502+
"Bad subvector index");
58503+
if ((SrcIdx0 == (NumSrcElts0 / 2) && SrcIdx1 == (NumSrcElts1 / 2)) ||
58504+
(IsSplat && ISD::isNormalLoad(Src0.getOperand(0).getNode()))) {
58505+
unsigned Index = 0;
58506+
Index |= SrcIdx0 == 0 ? 0x00 : 0x01;
58507+
Index |= SrcIdx1 == 0 ? 0x20 : 0x30;
58508+
return DAG.getNode(X86ISD::VPERM2X128, DL, VT,
58509+
DAG.getBitcast(VT, Src0.getOperand(0)),
58510+
DAG.getBitcast(VT, Src1.getOperand(0)),
58511+
DAG.getTargetConstant(Index, DL, MVT::i8));
58512+
}
5850458513
}
5850558514
// Widen extract_subvector
5850658515
// concat(extract_subvector(x,lo), extract_subvector(x,hi))
@@ -59313,6 +59322,45 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5931359322
return DAG.getBitcast(VT, Res);
5931459323
}
5931559324

59325+
// We can always convert per-lane vXf64 shuffles into VSHUFPD.
59326+
if (!IsSplat && NumOps == 2 && VT == MVT::v4f64 &&
59327+
all_of(Ops, [](SDValue Op) {
59328+
return Op.hasOneUse() && (Op.getOpcode() == X86ISD::MOVDDUP ||
59329+
Op.getOpcode() == X86ISD::SHUFP ||
59330+
Op.getOpcode() == X86ISD::VPERMILPI ||
59331+
Op.getOpcode() == X86ISD::BLENDI ||
59332+
Op.getOpcode() == X86ISD::UNPCKL ||
59333+
Op.getOpcode() == X86ISD::UNPCKH);
59334+
})) {
59335+
SmallVector<SDValue, 2> SrcOps0, SrcOps1;
59336+
SmallVector<int, 8> SrcMask0, SrcMask1;
59337+
if (getTargetShuffleMask(Ops[0], /*AllowSentinelZero=*/false, SrcOps0,
59338+
SrcMask0) &&
59339+
getTargetShuffleMask(Ops[1], /*AllowSentinelZero=*/false, SrcOps1,
59340+
SrcMask1)) {
59341+
assert(SrcMask0.size() == 2 && SrcMask1.size() == 2 && "Bad shuffles");
59342+
SDValue LHS[] = {SrcOps0[SrcMask0[0] / 2], SrcOps1[SrcMask1[0] / 2]};
59343+
SDValue RHS[] = {SrcOps0[SrcMask0[1] / 2], SrcOps1[SrcMask1[1] / 2]};
59344+
SDValue Concat0 =
59345+
combineConcatVectorOps(DL, VT, LHS, DAG, Subtarget, Depth + 1);
59346+
SDValue Concat1 =
59347+
combineConcatVectorOps(DL, VT, RHS, DAG, Subtarget, Depth + 1);
59348+
if (Concat0 || Concat1) {
59349+
unsigned SHUFPDMask = 0;
59350+
SHUFPDMask |= (SrcMask0[0] & 1) << 0;
59351+
SHUFPDMask |= (SrcMask0[1] & 1) << 1;
59352+
SHUFPDMask |= (SrcMask1[0] & 1) << 2;
59353+
SHUFPDMask |= (SrcMask1[1] & 1) << 3;
59354+
Concat0 =
59355+
Concat0 ? Concat0 : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS);
59356+
Concat1 =
59357+
Concat1 ? Concat1 : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, RHS);
59358+
return DAG.getNode(X86ISD::SHUFP, DL, VT, Concat0, Concat1,
59359+
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
59360+
}
59361+
}
59362+
}
59363+
5931659364
return SDValue();
5931759365
}
5931859366

0 commit comments

Comments
 (0)