@@ -42675,6 +42675,9 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42675
42675
return SDValue();
42676
42676
}
42677
42677
case X86ISD::VPERMV3: {
42678
+ MVT WideVT = VT.getDoubleNumVectorElementsVT();
42679
+ bool CanConcat = VT.is128BitVector() ||
42680
+ (VT.is256BitVector() && Subtarget.useAVX512Regs());
42678
42681
SmallVector<SDValue, 2> SrcOps;
42679
42682
SmallVector<int, 32> Mask;
42680
42683
if (getTargetShuffleMask(N, /*AllowSentinelZero=*/false, SrcOps, Mask)) {
@@ -42712,12 +42715,25 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42712
42715
return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
42713
42716
N.getOperand(0), Subtarget, DAG);
42714
42717
}
42718
+ // Combine VPERMV3 to widened VPERMV if the two source operands can be
42719
+ // freely concatenated, with a commuted shuffle mask.
42720
+ if (CanConcat) {
42721
+ if (SDValue ConcatSrc = combineConcatVectorOps(
42722
+ DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42723
+ Subtarget)) {
42724
+ ShuffleVectorSDNode::commuteMask(Mask);
42725
+ Mask.append(NumElts, SM_SentinelUndef);
42726
+ SDValue Perm =
42727
+ lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42728
+ DAG.getUNDEF(WideVT), Subtarget, DAG);
42729
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42730
+ DAG.getVectorIdxConstant(0, DL));
42731
+ }
42732
+ }
42715
42733
}
42716
42734
// Combine VPERMV3 to widened VPERMV if the two source operands can be
42717
42735
// freely concatenated.
42718
- MVT WideVT = VT.getDoubleNumVectorElementsVT();
42719
- if (VT.is128BitVector() ||
42720
- (VT.is256BitVector() && Subtarget.useAVX512Regs())) {
42736
+ if (CanConcat) {
42721
42737
SDValue Ops[] = {N.getOperand(0), N.getOperand(2)};
42722
42738
if (SDValue ConcatSrc =
42723
42739
combineConcatVectorOps(DL, WideVT, Ops, DAG, Subtarget)) {
@@ -42727,22 +42743,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42727
42743
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42728
42744
DAG.getVectorIdxConstant(0, DL));
42729
42745
}
42730
- // See if we can concatenate the commuted operands (and then cheaply
42731
- // shuffle them, for constant shuffle masks this should fold away).
42732
- SDValue SwapOps[] = {N.getOperand(2), N.getOperand(0)};
42733
- if (SDValue ConcatSrc =
42734
- combineConcatVectorOps(DL, WideVT, SwapOps, DAG, Subtarget)) {
42735
- SmallVector<int, 16> SwapMask(WideVT.getVectorNumElements());
42736
- std::iota(SwapMask.begin(), SwapMask.begin() + NumElts, NumElts);
42737
- std::iota(SwapMask.begin() + NumElts, SwapMask.end(), 0);
42738
- SDValue Swap = DAG.getVectorShuffle(WideVT, DL, ConcatSrc,
42739
- DAG.getUNDEF(WideVT), SwapMask);
42740
- SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42741
- DL, WideVT.getSizeInBits());
42742
- SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, Swap);
42743
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42744
- DAG.getVectorIdxConstant(0, DL));
42745
- }
42746
42746
}
42747
42747
return SDValue();
42748
42748
}
0 commit comments