@@ -1153,8 +1153,8 @@ static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
1153
1153
1154
1154
/// \returns a bitset for selecting opcodes. false for Opcode0 and true for
1155
1155
/// Opcode1.
1156
- SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
1157
- unsigned Opcode1) {
1156
+ static SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
1157
+ unsigned Opcode1) {
1158
1158
Type *ScalarTy = VL[0]->getType();
1159
1159
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
1160
1160
SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false);
@@ -1371,7 +1371,7 @@ class BoUpSLP {
1371
1371
MustGather.clear();
1372
1372
NonScheduledFirst.clear();
1373
1373
EntryToLastInstruction.clear();
1374
- GatheredLoadsEntriesFirst = NoGatheredLoads ;
1374
+ GatheredLoadsEntriesFirst.reset() ;
1375
1375
ExternalUses.clear();
1376
1376
ExternalUsesAsOriginalScalar.clear();
1377
1377
for (auto &Iter : BlocksSchedules) {
@@ -3193,7 +3193,7 @@ class BoUpSLP {
3193
3193
SmallVector<EdgeInfo, 1> UserTreeIndices;
3194
3194
3195
3195
/// The index of this treeEntry in VectorizableTree.
3196
- int Idx = -1 ;
3196
+ unsigned Idx = 0 ;
3197
3197
3198
3198
/// For gather/buildvector/alt opcode (TODO) nodes, which are combined from
3199
3199
/// other nodes as a series of insertvector instructions.
@@ -3461,7 +3461,7 @@ class BoUpSLP {
3461
3461
(Bundle && EntryState != TreeEntry::NeedToGather)) &&
3462
3462
"Need to vectorize gather entry?");
3463
3463
// Gathered loads still gathered? Do not create entry, use the original one.
3464
- if (GatheredLoadsEntriesFirst != NoGatheredLoads &&
3464
+ if (GatheredLoadsEntriesFirst.has_value() &&
3465
3465
EntryState == TreeEntry::NeedToGather &&
3466
3466
S.getOpcode() == Instruction::Load && UserTreeIdx.EdgeIdx == UINT_MAX &&
3467
3467
!UserTreeIdx.UserTE)
@@ -3614,8 +3614,7 @@ class BoUpSLP {
3614
3614
ValueToGatherNodesMap ValueToGatherNodes;
3615
3615
3616
3616
/// The index of the first gathered load entry in the VectorizeTree.
3617
- constexpr static int NoGatheredLoads = -1;
3618
- int GatheredLoadsEntriesFirst = NoGatheredLoads;
3617
+ std::optional<unsigned> GatheredLoadsEntriesFirst;
3619
3618
3620
3619
/// This POD struct describes one external user in the vectorized tree.
3621
3620
struct ExternalUser {
@@ -6971,9 +6970,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
6971
6970
}
6972
6971
// If no new entries created, consider it as no gathered loads entries must be
6973
6972
// handled.
6974
- if (static_cast<unsigned>(GatheredLoadsEntriesFirst) ==
6973
+ if (static_cast<unsigned>(* GatheredLoadsEntriesFirst) ==
6975
6974
VectorizableTree.size())
6976
- GatheredLoadsEntriesFirst = NoGatheredLoads ;
6975
+ GatheredLoadsEntriesFirst.reset() ;
6977
6976
}
6978
6977
6979
6978
/// \return true if the specified list of values has only one instruction that
@@ -7704,7 +7703,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
7704
7703
if (S.getOpcode()) {
7705
7704
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
7706
7705
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
7707
- if (GatheredLoadsEntriesFirst != NoGatheredLoads || !E->isSame(VL)) {
7706
+ if (GatheredLoadsEntriesFirst.has_value() || !E->isSame(VL)) {
7708
7707
auto It = MultiNodeScalars.find(S.OpValue);
7709
7708
if (It != MultiNodeScalars.end()) {
7710
7709
auto *TEIt = find_if(It->getSecond(),
@@ -11094,9 +11093,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
11094
11093
"Not supported shufflevector usage.");
11095
11094
auto *SV = cast<ShuffleVectorInst>(V);
11096
11095
int Index;
11097
- [[maybe_unused]] bool isExtractSubvectorMask =
11096
+ [[maybe_unused]] bool IsExtractSubvectorMask =
11098
11097
SV->isExtractSubvectorMask(Index);
11099
- assert(isExtractSubvectorMask &&
11098
+ assert(IsExtractSubvectorMask &&
11100
11099
"Not supported shufflevector usage.");
11101
11100
if (NextIndex != Index)
11102
11101
return false;
@@ -11822,8 +11821,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
11822
11821
KeepScalar = true;
11823
11822
} else if (KeepScalar && ScalarCost != TTI::TCC_Free &&
11824
11823
ExtraCost - ScalarCost <= TTI::TCC_Basic &&
11825
- (GatheredLoadsEntriesFirst == NoGatheredLoads ||
11826
- Entry->Idx < GatheredLoadsEntriesFirst)) {
11824
+ (! GatheredLoadsEntriesFirst.has_value() ||
11825
+ Entry->Idx < * GatheredLoadsEntriesFirst)) {
11827
11826
unsigned ScalarUsesCount = count_if(Entry->Scalars, [&](Value *V) {
11828
11827
return ValueToExtUses->contains(V);
11829
11828
});
@@ -12281,7 +12280,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
12281
12280
VToTEs.insert(TEPtr);
12282
12281
}
12283
12282
if (const TreeEntry *VTE = getTreeEntry(V)) {
12284
- if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst) {
12283
+ if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst.value_or(0) ) {
12285
12284
if (VTE->State != TreeEntry::Vectorize) {
12286
12285
auto It = MultiNodeScalars.find(V);
12287
12286
if (It == MultiNodeScalars.end())
@@ -12560,7 +12559,7 @@ BoUpSLP::isGatherShuffledEntry(
12560
12559
Entries.clear();
12561
12560
// No need to check for the topmost gather node.
12562
12561
if (TE == VectorizableTree.front().get() &&
12563
- (GatheredLoadsEntriesFirst == NoGatheredLoads ||
12562
+ (! GatheredLoadsEntriesFirst.has_value() ||
12564
12563
none_of(ArrayRef(VectorizableTree).drop_front(),
12565
12564
[](const std::unique_ptr<TreeEntry> &TE) {
12566
12565
return !TE->isGather();
@@ -12712,9 +12711,9 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
12712
12711
// constant indices or gathered loads).
12713
12712
auto *Front = E->getMainOp();
12714
12713
auto *BB = Front->getParent();
12715
- assert(((GatheredLoadsEntriesFirst != NoGatheredLoads &&
12714
+ assert(((GatheredLoadsEntriesFirst.has_value() &&
12716
12715
E->getOpcode() == Instruction::Load && E->isGather() &&
12717
- E->Idx < GatheredLoadsEntriesFirst) ||
12716
+ E->Idx < * GatheredLoadsEntriesFirst) ||
12718
12717
all_of(E->Scalars,
12719
12718
[=](Value *V) -> bool {
12720
12719
if (E->getOpcode() == Instruction::GetElementPtr &&
@@ -12742,9 +12741,9 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
12742
12741
!isa<GetElementPtrInst>(I)) ||
12743
12742
(isVectorLikeInstWithConstOps(LastInst) &&
12744
12743
isVectorLikeInstWithConstOps(I)) ||
12745
- (GatheredLoadsEntriesFirst != NoGatheredLoads &&
12744
+ (GatheredLoadsEntriesFirst.has_value() &&
12746
12745
E->getOpcode() == Instruction::Load && E->isGather() &&
12747
- E->Idx < GatheredLoadsEntriesFirst)) &&
12746
+ E->Idx < * GatheredLoadsEntriesFirst)) &&
12748
12747
"Expected vector-like or non-GEP in GEP node insts only.");
12749
12748
if (!DT->isReachableFromEntry(LastInst->getParent())) {
12750
12749
LastInst = I;
@@ -12802,8 +12801,8 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
12802
12801
};
12803
12802
12804
12803
// Set insertpoint for gathered loads to the very first load.
12805
- if (GatheredLoadsEntriesFirst != NoGatheredLoads &&
12806
- E->Idx >= GatheredLoadsEntriesFirst && !E->isGather() &&
12804
+ if (GatheredLoadsEntriesFirst.has_value() &&
12805
+ E->Idx >= * GatheredLoadsEntriesFirst && !E->isGather() &&
12807
12806
E->getOpcode() == Instruction::Load) {
12808
12807
Res = FindFirstInst();
12809
12808
return *Res;
@@ -15181,8 +15180,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
15181
15180
// Emit gathered loads first to emit better code for the users of those
15182
15181
// gathered loads.
15183
15182
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
15184
- if (GatheredLoadsEntriesFirst != NoGatheredLoads &&
15185
- TE->Idx >= GatheredLoadsEntriesFirst &&
15183
+ if (GatheredLoadsEntriesFirst.has_value() &&
15184
+ TE->Idx >= * GatheredLoadsEntriesFirst &&
15186
15185
(!TE->isGather() || !TE->UserTreeIndices.empty())) {
15187
15186
assert((!TE->UserTreeIndices.empty() ||
15188
15187
(TE->getOpcode() == Instruction::Load && !TE->isGather())) &&
@@ -15754,8 +15753,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
15754
15753
return EI.UserTE == VectorizableTree.front().get() &&
15755
15754
EI.EdgeIdx == UINT_MAX;
15756
15755
})) &&
15757
- !(GatheredLoadsEntriesFirst != NoGatheredLoads &&
15758
- IE->Idx >= GatheredLoadsEntriesFirst &&
15756
+ !(GatheredLoadsEntriesFirst.has_value() &&
15757
+ IE->Idx >= * GatheredLoadsEntriesFirst &&
15759
15758
VectorizableTree.front()->isGather() &&
15760
15759
is_contained(VectorizableTree.front()->Scalars, I)))
15761
15760
continue;
@@ -16969,8 +16968,7 @@ void BoUpSLP::computeMinimumValueSizes() {
16969
16968
(NodeIdx == 0 && !VectorizableTree[NodeIdx]->UserTreeIndices.empty()) ||
16970
16969
(NodeIdx != 0 && any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
16971
16970
[NodeIdx](const EdgeInfo &EI) {
16972
- return EI.UserTE->Idx >
16973
- static_cast<int>(NodeIdx);
16971
+ return EI.UserTE->Idx > NodeIdx;
16974
16972
})))
16975
16973
return;
16976
16974
0 commit comments