Skip to content

Commit 17eb1d3

Browse files
committed
[X86] combineConcatVectorOps - concat mixed v2f64 shuffles into 4f64 vshufpd
We can always concatenate vXf64 per-lane shuffles into a single vshufpd instruction, assuming we can profitably concatenate at least one of its operands I was really hoping to get this into combineX86ShufflesRecursively but it still can't handle concatenation as well as combineConcatVectorOps yet.
1 parent 40933fd commit 17eb1d3

File tree

2 files changed

+215
-229
lines changed

2 files changed

+215
-229
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58492,14 +58492,23 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5849258492
const APInt &SrcIdx0 = Src0.getConstantOperandAPInt(1);
5849358493
const APInt &SrcIdx1 = Src1.getConstantOperandAPInt(1);
5849458494
// concat(extract_subvector(v0), extract_subvector(v1)) -> vperm2x128.
58495-
// Only concat of subvector high halves which vperm2x128 is best at.
58495+
// Only concat of subvector high halves which vperm2x128 is best at or if
58496+
// it should fold into a subvector broadcast.
5849658497
if (VT.is256BitVector() && SrcVT0.is256BitVector() &&
58497-
SrcVT1.is256BitVector() && SrcIdx0 == (NumSrcElts0 / 2) &&
58498-
SrcIdx1 == (NumSrcElts1 / 2)) {
58499-
return DAG.getNode(X86ISD::VPERM2X128, DL, VT,
58500-
DAG.getBitcast(VT, Src0.getOperand(0)),
58501-
DAG.getBitcast(VT, Src1.getOperand(0)),
58502-
DAG.getTargetConstant(0x31, DL, MVT::i8));
58498+
SrcVT1.is256BitVector()) {
58499+
assert((SrcIdx0 == 0 || SrcIdx0 == (NumSrcElts0 / 2)) &&
58500+
(SrcIdx1 == 0 || SrcIdx1 == (NumSrcElts1 / 2)) &&
58501+
"Bad subvector index");
58502+
if ((SrcIdx0 == (NumSrcElts0 / 2) && SrcIdx1 == (NumSrcElts1 / 2)) ||
58503+
(IsSplat && ISD::isNormalLoad(Src0.getOperand(0).getNode()))) {
58504+
unsigned Index = 0;
58505+
Index |= SrcIdx0 == 0 ? 0x00 : 0x01;
58506+
Index |= SrcIdx1 == 0 ? 0x20 : 0x30;
58507+
return DAG.getNode(X86ISD::VPERM2X128, DL, VT,
58508+
DAG.getBitcast(VT, Src0.getOperand(0)),
58509+
DAG.getBitcast(VT, Src1.getOperand(0)),
58510+
DAG.getTargetConstant(Index, DL, MVT::i8));
58511+
}
5850358512
}
5850458513
// Widen extract_subvector
5850558514
// concat(extract_subvector(x,lo), extract_subvector(x,hi))
@@ -59312,6 +59321,45 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5931259321
return DAG.getBitcast(VT, Res);
5931359322
}
5931459323

59324+
// We can always convert per-lane vXf64 shuffles into VSHUFPD.
59325+
if (!IsSplat && NumOps == 2 && VT == MVT::v4f64 &&
59326+
all_of(Ops, [](SDValue Op) {
59327+
return Op.hasOneUse() && (Op.getOpcode() == X86ISD::MOVDDUP ||
59328+
Op.getOpcode() == X86ISD::SHUFP ||
59329+
Op.getOpcode() == X86ISD::VPERMILPI ||
59330+
Op.getOpcode() == X86ISD::BLENDI ||
59331+
Op.getOpcode() == X86ISD::UNPCKL ||
59332+
Op.getOpcode() == X86ISD::UNPCKH);
59333+
})) {
59334+
SmallVector<SDValue, 2> SrcOps0, SrcOps1;
59335+
SmallVector<int, 8> SrcMask0, SrcMask1;
59336+
if (getTargetShuffleMask(Ops[0], /*AllowSentinelZero=*/false, SrcOps0,
59337+
SrcMask0) &&
59338+
getTargetShuffleMask(Ops[1], /*AllowSentinelZero=*/false, SrcOps1,
59339+
SrcMask1)) {
59340+
assert(SrcMask0.size() == 2 && SrcMask1.size() == 2 && "Bad shuffles");
59341+
SDValue LHS[] = {SrcOps0[SrcMask0[0] / 2], SrcOps1[SrcMask1[0] / 2]};
59342+
SDValue RHS[] = {SrcOps0[SrcMask0[1] / 2], SrcOps1[SrcMask1[1] / 2]};
59343+
SDValue Concat0 =
59344+
combineConcatVectorOps(DL, VT, LHS, DAG, Subtarget, Depth + 1);
59345+
SDValue Concat1 =
59346+
combineConcatVectorOps(DL, VT, RHS, DAG, Subtarget, Depth + 1);
59347+
if (Concat0 || Concat1) {
59348+
unsigned SHUFPDMask = 0;
59349+
SHUFPDMask |= (SrcMask0[0] & 1) << 0;
59350+
SHUFPDMask |= (SrcMask0[1] & 1) << 1;
59351+
SHUFPDMask |= (SrcMask1[0] & 1) << 2;
59352+
SHUFPDMask |= (SrcMask1[1] & 1) << 3;
59353+
Concat0 =
59354+
Concat0 ? Concat0 : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS);
59355+
Concat1 =
59356+
Concat1 ? Concat1 : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, RHS);
59357+
return DAG.getNode(X86ISD::SHUFP, DL, VT, Concat0, Concat1,
59358+
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
59359+
}
59360+
}
59361+
}
59362+
5931559363
return SDValue();
5931659364
}
5931759365

0 commit comments

Comments
 (0)