@@ -11732,8 +11732,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
11732
11732
// Find the cost of inserting/extracting values from the vector.
11733
11733
// Check if the same elements are inserted several times and count them as
11734
11734
// shuffle candidates.
11735
- unsigned ScalarTyNumElements = getNumElements(ScalarTy);
11736
- APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
11735
+ APInt ShuffledElements = APInt::getZero(VL.size());
11737
11736
DenseMap<Value *, unsigned> UniqueElements;
11738
11737
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
11739
11738
InstructionCost Cost;
@@ -11753,8 +11752,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
11753
11752
Value *V = VL[I];
11754
11753
// No need to shuffle duplicates for constants.
11755
11754
if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
11756
- ShuffledElements.setBits(I * ScalarTyNumElements,
11757
- I * ScalarTyNumElements + ScalarTyNumElements);
11755
+ ShuffledElements.setBit(I);
11758
11756
ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
11759
11757
continue;
11760
11758
}
@@ -11767,14 +11765,27 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
11767
11765
}
11768
11766
11769
11767
DuplicateNonConst = true;
11770
- ShuffledElements.setBits(I * ScalarTyNumElements,
11771
- I * ScalarTyNumElements + ScalarTyNumElements);
11768
+ ShuffledElements.setBit(I);
11772
11769
ShuffleMask[I] = Res.first->second;
11773
11770
}
11774
- if (ForPoisonSrc)
11775
- Cost =
11776
- TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /*Insert*/ true,
11777
- /*Extract*/ false, CostKind);
11771
+ if (ForPoisonSrc) {
11772
+ if (isa<FixedVectorType>(ScalarTy)) {
11773
+ assert(SLPReVec && "Only supported by REVEC.");
11774
+ // We don't need to insert elements one by one. Instead, we can insert the
11775
+ // entire vector into the destination.
11776
+ Cost = 0;
11777
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
11778
+ for (unsigned I = 0, E = VL.size(); I != E; ++I)
11779
+ if (!ShuffledElements[I])
11780
+ Cost += TTI->getShuffleCost(
11781
+ TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
11782
+ I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
11783
+ } else {
11784
+ Cost = TTI->getScalarizationOverhead(VecTy, ~ShuffledElements,
11785
+ /*Insert*/ true,
11786
+ /*Extract*/ false, CostKind);
11787
+ }
11788
+ }
11778
11789
if (DuplicateNonConst)
11779
11790
Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
11780
11791
VecTy, ShuffleMask);
0 commit comments