@@ -1732,6 +1732,36 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1732
1732
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc, BinResTy,
1733
1733
OldMask, CostKind, 0 , nullptr , {LHS, RHS}, &I);
1734
1734
1735
+ // Handle shuffle(binop(shuffle(x),y),binop(z,shuffle(w))) style patterns
1736
+ // where one use shuffles have gotten split across the binop/cmp. These
1737
+ // often allow a major reduction in total cost that wouldn't happen as
1738
+ // individual folds.
1739
+ auto MergeInner = [&](Value *&Op, int Offset, MutableArrayRef<int > Mask,
1740
+ TTI::TargetCostKind CostKind) -> bool {
1741
+ Value *InnerOp;
1742
+ ArrayRef<int > InnerMask;
1743
+ if (match (Op, m_OneUse (m_Shuffle (m_Value (InnerOp), m_Undef (),
1744
+ m_Mask (InnerMask)))) &&
1745
+ all_of (InnerMask,
1746
+ [NumSrcElts](int M) { return M < (int )NumSrcElts; }) &&
1747
+ InnerOp->getType () == Op->getType ()) {
1748
+ for (int &M : Mask)
1749
+ if (Offset <= M && M < (int )(Offset + NumSrcElts)) {
1750
+ M = InnerMask[M - Offset];
1751
+ M = 0 <= M ? M + Offset : M;
1752
+ }
1753
+ OldCost += TTI.getInstructionCost (cast<Instruction>(Op), CostKind);
1754
+ Op = InnerOp;
1755
+ return true ;
1756
+ }
1757
+ return false ;
1758
+ };
1759
+ bool ReducedInstCount = false ;
1760
+ ReducedInstCount |= MergeInner (X, 0 , NewMask0, CostKind);
1761
+ ReducedInstCount |= MergeInner (Y, 0 , NewMask1, CostKind);
1762
+ ReducedInstCount |= MergeInner (Z, NumSrcElts, NewMask0, CostKind);
1763
+ ReducedInstCount |= MergeInner (W, NumSrcElts, NewMask1, CostKind);
1764
+
1735
1765
InstructionCost NewCost =
1736
1766
TTI.getShuffleCost (SK0, BinOpTy, NewMask0, CostKind, 0 , nullptr , {X, Z}) +
1737
1767
TTI.getShuffleCost (SK1, BinOpTy, NewMask1, CostKind, 0 , nullptr , {Y, W});
@@ -1752,8 +1782,8 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1752
1782
1753
1783
// If either shuffle will constant fold away, then fold for the same cost as
1754
1784
// we will reduce the instruction count.
1755
- bool ReducedInstCount = (isa<Constant>(X) && isa<Constant>(Z)) ||
1756
- (isa<Constant>(Y) && isa<Constant>(W));
1785
+ ReducedInstCount | = (isa<Constant>(X) && isa<Constant>(Z)) ||
1786
+ (isa<Constant>(Y) && isa<Constant>(W));
1757
1787
if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
1758
1788
return false ;
1759
1789
0 commit comments