@@ -408,7 +408,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) {
408
408
/// total number of elements \p Size and number of registers (parts) \p
409
409
/// NumParts.
410
410
static unsigned getPartNumElems(unsigned Size, unsigned NumParts) {
411
- return std::min<unsigned>(Size, PowerOf2Ceil(divideCeil(Size, NumParts) ));
411
+ return PowerOf2Ceil(divideCeil(Size, NumParts));
412
412
}
413
413
414
414
/// Returns correct remaining number of elements, considering total amount \p
@@ -7021,11 +7021,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7021
7021
UniqueValues.emplace_back(V);
7022
7022
}
7023
7023
size_t NumUniqueScalarValues = UniqueValues.size();
7024
- bool IsFullVectors =
7025
- hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
7026
- NumUniqueScalarValues);
7027
- if (NumUniqueScalarValues == VL.size() &&
7028
- (VectorizeNonPowerOf2 || IsFullVectors)) {
7024
+ if (NumUniqueScalarValues == VL.size()) {
7029
7025
ReuseShuffleIndices.clear();
7030
7026
} else {
7031
7027
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
@@ -7036,10 +7032,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7036
7032
return false;
7037
7033
}
7038
7034
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
7039
- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
7040
- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
7041
- return isa<UndefValue>(V) || !isConstant(V);
7042
- }))) {
7035
+ if (NumUniqueScalarValues <= 1 ||
7036
+ (UniquePositions.size() == 1 && all_of(UniqueValues,
7037
+ [](Value *V) {
7038
+ return isa<UndefValue>(V) ||
7039
+ !isConstant(V);
7040
+ })) ||
7041
+ !hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
7042
+ NumUniqueScalarValues)) {
7043
7043
if (DoNotFail && UniquePositions.size() > 1 &&
7044
7044
NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
7045
7045
all_of(UniqueValues, [=](Value *V) {
@@ -9143,6 +9143,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
9143
9143
return nullptr;
9144
9144
Value *VecBase = nullptr;
9145
9145
ArrayRef<Value *> VL = E->Scalars;
9146
+ // If the resulting type is scalarized, do not adjust the cost.
9147
+ if (NumParts == VL.size())
9148
+ return nullptr;
9146
9149
// Check if it can be considered reused if same extractelements were
9147
9150
// vectorized already.
9148
9151
bool PrevNodeFound = any_of(
@@ -9795,7 +9798,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
9795
9798
InsertMask[Idx] = I + 1;
9796
9799
}
9797
9800
unsigned VecScalarsSz = PowerOf2Ceil(NumElts);
9798
- if (NumOfParts > 0 && NumOfParts < NumElts )
9801
+ if (NumOfParts > 0)
9799
9802
VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts);
9800
9803
unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) *
9801
9804
VecScalarsSz;
0 commit comments