@@ -11358,7 +11358,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
11358
11358
// Find the cost of inserting/extracting values from the vector.
11359
11359
// Check if the same elements are inserted several times and count them as
11360
11360
// shuffle candidates.
11361
- APInt ShuffledElements = APInt::getZero(VL.size());
11361
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
11362
+ APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
11362
11363
DenseMap<Value *, unsigned> UniqueElements;
11363
11364
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
11364
11365
InstructionCost Cost;
@@ -11378,7 +11379,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
11378
11379
Value *V = VL[I];
11379
11380
// No need to shuffle duplicates for constants.
11380
11381
if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
11381
- ShuffledElements.setBit(I);
11382
+ for (unsigned J = 0; J != ScalarTyNumElements; ++J)
11383
+ ShuffledElements.setBit(I * ScalarTyNumElements + J);
11382
11384
ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
11383
11385
continue;
11384
11386
}
@@ -11391,7 +11393,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
11391
11393
}
11392
11394
11393
11395
DuplicateNonConst = true;
11394
- ShuffledElements.setBit(I);
11396
+ for (unsigned J = 0; J != ScalarTyNumElements; ++J)
11397
+ ShuffledElements.setBit(I * ScalarTyNumElements + J);
11395
11398
ShuffleMask[I] = Res.first->second;
11396
11399
}
11397
11400
if (ForPoisonSrc)
0 commit comments