Skip to content

Commit 6ec66a2

Browse files
committed
[X86] Move VPERMV3(X,M,Y) -> VPERMV(M,CONCAT(X,Y)) fold after general VPERMV3 canonicalization
Pulled out of #133923 - this prevents regressions with SimplifyDemandedVectorEltsForTargetNode exposing VPERMV3(X,M,X) repeated operand patterns which were getting concatenated to wider VPERMV nodes before simpler canonicalizations could clean them up.
1 parent 7baa7ed commit 6ec66a2

File tree

1 file changed

+27
-27
lines changed

1 file changed

+27
-27
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -42673,40 +42673,13 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4267342673
return SDValue();
4267442674
}
4267542675
case X86ISD::VPERMV3: {
42676-
// Combine VPERMV3 to widened VPERMV if the two source operands can be
42677-
// freely concatenated.
4267842676
MVT WideVT = VT.getDoubleNumVectorElementsVT();
4267942677
bool CanConcat = VT.is128BitVector() ||
4268042678
(VT.is256BitVector() && Subtarget.useAVX512Regs());
42681-
if (CanConcat) {
42682-
SDValue Ops[] = {N.getOperand(0), N.getOperand(2)};
42683-
if (SDValue ConcatSrc =
42684-
combineConcatVectorOps(DL, WideVT, Ops, DAG, Subtarget)) {
42685-
SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42686-
DL, WideVT.getSizeInBits());
42687-
SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, ConcatSrc);
42688-
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42689-
DAG.getVectorIdxConstant(0, DL));
42690-
}
42691-
}
4269242679
SmallVector<SDValue, 2> SrcOps;
4269342680
SmallVector<int, 32> Mask;
4269442681
if (getTargetShuffleMask(N, /*AllowSentinelZero=*/false, SrcOps, Mask)) {
4269542682
assert(Mask.size() == NumElts && "Unexpected shuffle mask size");
42696-
// See if we can concatenate the commuted operands.
42697-
if (CanConcat) {
42698-
if (SDValue ConcatSrc = combineConcatVectorOps(
42699-
DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42700-
Subtarget)) {
42701-
ShuffleVectorSDNode::commuteMask(Mask);
42702-
Mask.append(NumElts, SM_SentinelUndef);
42703-
SDValue Perm =
42704-
lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42705-
DAG.getUNDEF(WideVT), Subtarget, DAG);
42706-
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42707-
DAG.getVectorIdxConstant(0, DL));
42708-
}
42709-
}
4271042683
SDValue V1 = peekThroughBitcasts(N.getOperand(0));
4271142684
SDValue V2 = peekThroughBitcasts(N.getOperand(2));
4271242685
// Canonicalize to VPERMV if both sources are the same.
@@ -42740,6 +42713,33 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4274042713
return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
4274142714
N.getOperand(0), Subtarget, DAG);
4274242715
}
42716+
// Combine VPERMV3 to widened VPERMV if the two source operands can be
42717+
// freely concatenated, with a commuted shuffle mask.
42718+
if (CanConcat) {
42719+
if (SDValue ConcatSrc = combineConcatVectorOps(
42720+
DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42721+
Subtarget)) {
42722+
ShuffleVectorSDNode::commuteMask(Mask);
42723+
Mask.append(NumElts, SM_SentinelUndef);
42724+
SDValue Perm =
42725+
lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42726+
DAG.getUNDEF(WideVT), Subtarget, DAG);
42727+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42728+
DAG.getVectorIdxConstant(0, DL));
42729+
}
42730+
}
42731+
}
42732+
// Combine VPERMV3 to widened VPERMV if the two source operands can be
42733+
// freely concatenated.
42734+
if (CanConcat) {
42735+
if (SDValue ConcatSrc = combineConcatVectorOps(
42736+
DL, WideVT, {N.getOperand(0), N.getOperand(2)}, DAG, Subtarget)) {
42737+
SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42738+
DL, WideVT.getSizeInBits());
42739+
SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, ConcatSrc);
42740+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42741+
DAG.getVectorIdxConstant(0, DL));
42742+
}
4274342743
}
4274442744
return SDValue();
4274542745
}

0 commit comments

Comments
 (0)