@@ -1721,11 +1721,11 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
1721
1721
Value *V0, *V1;
1722
1722
UndefValue *U0, *U1;
1723
1723
ArrayRef<int > OuterMask, InnerMask0, InnerMask1;
1724
- if (!match (&I, m_Shuffle ( m_OneUse ( m_Shuffle ( m_Value (V0), m_UndefValue (U0),
1725
- m_Mask (InnerMask0))),
1726
- m_OneUse ( m_Shuffle (m_Value (V1 ), m_UndefValue (U1 ),
1727
- m_Mask (InnerMask1) )),
1728
- m_Mask (OuterMask))))
1724
+ if (!match (&I,
1725
+ m_Shuffle (
1726
+ m_Shuffle (m_Value (V0 ), m_UndefValue (U0), m_Mask (InnerMask0) ),
1727
+ m_Shuffle ( m_Value (V1), m_UndefValue (U1), m_Mask (InnerMask1)),
1728
+ m_Mask (OuterMask))))
1729
1729
return false ;
1730
1730
1731
1731
auto *ShufI0 = dyn_cast<Instruction>(I.getOperand (0 ));
@@ -1769,17 +1769,24 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
1769
1769
// Try to merge the shuffles if the new shuffle is not costly.
1770
1770
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1771
1771
1772
- InstructionCost OldCost =
1772
+ InstructionCost InnerCost0 =
1773
1773
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
1774
- InnerMask0, CostKind, 0 , nullptr , {V0, U0}, ShufI0) +
1774
+ InnerMask0, CostKind, 0 , nullptr , {V0, U0}, ShufI0);
1775
+ InstructionCost InnerCost1 =
1775
1776
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
1776
- InnerMask1, CostKind, 0 , nullptr , {V1, U1}, ShufI1) +
1777
+ InnerMask1, CostKind, 0 , nullptr , {V1, U1}, ShufI1);
1778
+ InstructionCost OuterCost =
1777
1779
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc, ShuffleImmTy,
1778
1780
OuterMask, CostKind, 0 , nullptr , {ShufI0, ShufI1}, &I);
1781
+ InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;
1779
1782
1780
1783
InstructionCost NewCost =
1781
1784
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc, ShuffleSrcTy,
1782
1785
NewMask, CostKind, 0 , nullptr , {V0, V1});
1786
+ if (!ShufI0->hasOneUse ())
1787
+ NewCost += InnerCost0;
1788
+ if (!ShufI1->hasOneUse ())
1789
+ NewCost += InnerCost1;
1783
1790
1784
1791
LLVM_DEBUG (dbgs () << " Found a shuffle feeding two shuffles: " << I
1785
1792
<< " \n OldCost: " << OldCost << " vs NewCost: " << NewCost
0 commit comments