@@ -42673,40 +42673,13 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42673
42673
return SDValue();
42674
42674
}
42675
42675
case X86ISD::VPERMV3: {
42676
- // Combine VPERMV3 to widened VPERMV if the two source operands can be
42677
- // freely concatenated.
42678
42676
MVT WideVT = VT.getDoubleNumVectorElementsVT();
42679
42677
bool CanConcat = VT.is128BitVector() ||
42680
42678
(VT.is256BitVector() && Subtarget.useAVX512Regs());
42681
- if (CanConcat) {
42682
- SDValue Ops[] = {N.getOperand(0), N.getOperand(2)};
42683
- if (SDValue ConcatSrc =
42684
- combineConcatVectorOps(DL, WideVT, Ops, DAG, Subtarget)) {
42685
- SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42686
- DL, WideVT.getSizeInBits());
42687
- SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, ConcatSrc);
42688
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42689
- DAG.getVectorIdxConstant(0, DL));
42690
- }
42691
- }
42692
42679
SmallVector<SDValue, 2> SrcOps;
42693
42680
SmallVector<int, 32> Mask;
42694
42681
if (getTargetShuffleMask(N, /*AllowSentinelZero=*/false, SrcOps, Mask)) {
42695
42682
assert(Mask.size() == NumElts && "Unexpected shuffle mask size");
42696
- // See if we can concatenate the commuted operands.
42697
- if (CanConcat) {
42698
- if (SDValue ConcatSrc = combineConcatVectorOps(
42699
- DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42700
- Subtarget)) {
42701
- ShuffleVectorSDNode::commuteMask(Mask);
42702
- Mask.append(NumElts, SM_SentinelUndef);
42703
- SDValue Perm =
42704
- lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42705
- DAG.getUNDEF(WideVT), Subtarget, DAG);
42706
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42707
- DAG.getVectorIdxConstant(0, DL));
42708
- }
42709
- }
42710
42683
SDValue V1 = peekThroughBitcasts(N.getOperand(0));
42711
42684
SDValue V2 = peekThroughBitcasts(N.getOperand(2));
42712
42685
// Canonicalize to VPERMV if both sources are the same.
@@ -42740,6 +42713,33 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42740
42713
return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
42741
42714
N.getOperand(0), Subtarget, DAG);
42742
42715
}
42716
+ // Combine VPERMV3 to widened VPERMV if the two source operands can be
42717
+ // freely concatenated, with a commuted shuffle mask.
42718
+ if (CanConcat) {
42719
+ if (SDValue ConcatSrc = combineConcatVectorOps(
42720
+ DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG,
42721
+ Subtarget)) {
42722
+ ShuffleVectorSDNode::commuteMask(Mask);
42723
+ Mask.append(NumElts, SM_SentinelUndef);
42724
+ SDValue Perm =
42725
+ lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42726
+ DAG.getUNDEF(WideVT), Subtarget, DAG);
42727
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42728
+ DAG.getVectorIdxConstant(0, DL));
42729
+ }
42730
+ }
42731
+ }
42732
+ // Combine VPERMV3 to widened VPERMV if the two source operands can be
42733
+ // freely concatenated.
42734
+ if (CanConcat) {
42735
+ if (SDValue ConcatSrc = combineConcatVectorOps(
42736
+ DL, WideVT, {N.getOperand(0), N.getOperand(2)}, DAG, Subtarget)) {
42737
+ SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
42738
+ DL, WideVT.getSizeInBits());
42739
+ SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask, ConcatSrc);
42740
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
42741
+ DAG.getVectorIdxConstant(0, DL));
42742
+ }
42743
42743
}
42744
42744
return SDValue();
42745
42745
}
0 commit comments