@@ -15347,13 +15347,14 @@ BoUpSLP::isGatherShuffledEntry(
15347
15347
15348
15348
InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
15349
15349
Type *ScalarTy) const {
15350
- auto *VecTy = getWidenedType(ScalarTy, VL.size());
15350
+ const unsigned VF = VL.size();
15351
+ auto *VecTy = getWidenedType(ScalarTy, VF);
15351
15352
bool DuplicateNonConst = false;
15352
15353
// Find the cost of inserting/extracting values from the vector.
15353
15354
// Check if the same elements are inserted several times and count them as
15354
15355
// shuffle candidates.
15355
- APInt ShuffledElements = APInt::getZero(VL.size() );
15356
- APInt DemandedElements = APInt::getZero(VL.size() );
15356
+ APInt ShuffledElements = APInt::getZero(VF );
15357
+ APInt DemandedElements = APInt::getZero(VF );
15357
15358
DenseMap<Value *, unsigned> UniqueElements;
15358
15359
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
15359
15360
InstructionCost Cost;
@@ -15363,11 +15364,10 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
15363
15364
Cost += TTI->getCastInstrCost(Instruction::Trunc, ScalarTy, V->getType(),
15364
15365
TTI::CastContextHint::None, CostKind);
15365
15366
};
15366
- SmallVector<int> ShuffleMask(VL.size() , PoisonMaskElem);
15367
- SmallVector<int> ConstantShuffleMask(VL.size() , PoisonMaskElem);
15367
+ SmallVector<int> ShuffleMask(VF , PoisonMaskElem);
15368
+ SmallVector<int> ConstantShuffleMask(VF , PoisonMaskElem);
15368
15369
std::iota(ConstantShuffleMask.begin(), ConstantShuffleMask.end(), 0);
15369
- for (unsigned I = 0, E = VL.size(); I < E; ++I) {
15370
- Value *V = VL[I];
15370
+ for (auto [I, V] : enumerate(VL)) {
15371
15371
// No need to shuffle duplicates for constants.
15372
15372
if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
15373
15373
ShuffledElements.setBit(I);
@@ -15376,7 +15376,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
15376
15376
}
15377
15377
15378
15378
if (isConstant(V)) {
15379
- ConstantShuffleMask[I] = I + E ;
15379
+ ConstantShuffleMask[I] = I + VF ;
15380
15380
ShuffleMask[I] = I;
15381
15381
continue;
15382
15382
}
@@ -15398,12 +15398,15 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
15398
15398
if (!ForPoisonSrc && IsAnyNonUndefConst) {
15399
15399
Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteTwoSrc, VecTy,
15400
15400
ConstantShuffleMask);
15401
- for (auto [Idx, I] : enumerate(ShuffleMask)) {
15402
- if (I == PoisonMaskElem)
15403
- I = Idx;
15404
- else
15405
- I += VL.size();
15406
- }
15401
+ // Update the shuffle mask for shuffling with incoming source (all elements
15402
+ // are used!) or with constant subvector.
15403
+ for_each(enumerate(ShuffleMask), [&](auto P) {
15404
+ if ((!ForPoisonSrc && P.value() == PoisonMaskElem) ||
15405
+ ConstantShuffleMask[P.index()] != PoisonMaskElem)
15406
+ P.value() = P.index();
15407
+ else if (P.value() != PoisonMaskElem)
15408
+ P.value() += VF;
15409
+ });
15407
15410
}
15408
15411
15409
15412
// 2. Insert unique non-constants.
@@ -15415,7 +15418,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
15415
15418
// 3. Shuffle duplicates.
15416
15419
if (DuplicateNonConst)
15417
15420
Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
15418
- VecTy, ShuffleMask);
15421
+ VecTy, ShuffleMask, CostKind );
15419
15422
return Cost;
15420
15423
}
15421
15424
0 commit comments