Skip to content

Commit 30ebcf6

Browse files
[SLP][NFC]Store operand entries in the map
Instead of looking through all the vectorizable tree to find the operand entry, better to store it in a separate map and perform quick lookup, basing on user tree entry and operand index. It allows to remove lots of duplicated code, simplify processing and fix potential future issues with the analysis, affected by the codegen. Also, improves compile time. Reviewers: HanKuanChen, RKSimon, hiraditya Reviewed By: hiraditya Pull Request: #140549
1 parent 0a71879 commit 30ebcf6

File tree

1 file changed

+49
-235
lines changed

1 file changed

+49
-235
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 49 additions & 235 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,6 +1886,7 @@ class BoUpSLP {
18861886
void deleteTree() {
18871887
VectorizableTree.clear();
18881888
ScalarToTreeEntries.clear();
1889+
OperandsToTreeEntry.clear();
18891890
ScalarsInSplitNodes.clear();
18901891
MustGather.clear();
18911892
NonScheduledFirst.clear();
@@ -3401,54 +3402,23 @@ class BoUpSLP {
34013402
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
34023403
bool &IsProfitableToDemote, bool IsTruncRoot) const;
34033404

3404-
/// Check if the operands on the edges \p Edges of the \p UserTE allows
3405-
/// reordering (i.e. the operands can be reordered because they have only one
3406-
/// user and reordarable).
3405+
/// Builds the list of reorderable operands on the edges \p Edges of the \p
3406+
/// UserTE, which allow reordering (i.e. the operands can be reordered because
3407+
/// they have only one user and reordarable).
34073408
/// \param ReorderableGathers List of all gather nodes that require reordering
34083409
/// (e.g., gather of extractlements or partially vectorizable loads).
34093410
/// \param GatherOps List of gather operand nodes for \p UserTE that require
34103411
/// reordering, subset of \p NonVectorized.
3411-
bool
3412-
canReorderOperands(TreeEntry *UserTE,
3413-
SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3414-
ArrayRef<TreeEntry *> ReorderableGathers,
3415-
SmallVectorImpl<TreeEntry *> &GatherOps);
3412+
void buildReorderableOperands(
3413+
TreeEntry *UserTE,
3414+
SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3415+
const SmallPtrSetImpl<const TreeEntry *> &ReorderableGathers,
3416+
SmallVectorImpl<TreeEntry *> &GatherOps);
34163417

34173418
/// Checks if the given \p TE is a gather node with clustered reused scalars
34183419
/// and reorders it per given \p Mask.
34193420
void reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const;
34203421

3421-
/// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3422-
/// if any. If it is not vectorized (gather node), returns nullptr.
3423-
TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
3424-
ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
3425-
TreeEntry *TE = nullptr;
3426-
const auto *It = find_if(VL, [&](Value *V) {
3427-
if (!isa<Instruction>(V))
3428-
return false;
3429-
for (TreeEntry *E : getTreeEntries(V)) {
3430-
if (E->UserTreeIndex == EdgeInfo(UserTE, OpIdx)) {
3431-
TE = E;
3432-
return true;
3433-
}
3434-
}
3435-
return false;
3436-
});
3437-
if (It != VL.end()) {
3438-
assert(TE->isSame(VL) && "Expected same scalars.");
3439-
return TE;
3440-
}
3441-
return nullptr;
3442-
}
3443-
3444-
/// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3445-
/// if any. If it is not vectorized (gather node), returns nullptr.
3446-
const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE,
3447-
unsigned OpIdx) const {
3448-
return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
3449-
const_cast<TreeEntry *>(UserTE), OpIdx);
3450-
}
3451-
34523422
/// Checks if all users of \p I are the part of the vectorization tree.
34533423
bool areAllUsersVectorized(
34543424
Instruction *I,
@@ -3509,19 +3479,6 @@ class BoUpSLP {
35093479
/// Vectorize a single entry in the tree.
35103480
Value *vectorizeTree(TreeEntry *E);
35113481

3512-
/// Returns vectorized operand node, that matches the order of the scalars
3513-
/// operand number \p NodeIdx in entry \p E.
3514-
TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3515-
ArrayRef<Value *> VL,
3516-
const InstructionsState &S);
3517-
const TreeEntry *
3518-
getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3519-
ArrayRef<Value *> VL,
3520-
const InstructionsState &S) const {
3521-
return const_cast<BoUpSLP *>(this)->getMatchedVectorizedOperand(E, NodeIdx,
3522-
VL, S);
3523-
}
3524-
35253482
/// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
35263483
/// \p E.
35273484
Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
@@ -3715,11 +3672,6 @@ class BoUpSLP {
37153672
return IsSame(Scalars, ReuseShuffleIndices);
37163673
}
37173674

3718-
bool isOperandGatherNode(const EdgeInfo &UserEI) const {
3719-
return isGather() && UserTreeIndex.EdgeIdx == UserEI.EdgeIdx &&
3720-
UserTreeIndex.UserTE == UserEI.UserTE;
3721-
}
3722-
37233675
/// \returns true if current entry has same operands as \p TE.
37243676
bool hasEqualOperands(const TreeEntry &TE) const {
37253677
if (TE.getNumOperands() != getNumOperands())
@@ -4107,6 +4059,9 @@ class BoUpSLP {
41074059
TreeEntry *Last = VectorizableTree.back().get();
41084060
Last->Idx = VectorizableTree.size() - 1;
41094061
Last->State = EntryState;
4062+
if (UserTreeIdx.UserTE)
4063+
OperandsToTreeEntry.try_emplace(
4064+
std::make_pair(UserTreeIdx.UserTE, UserTreeIdx.EdgeIdx), Last);
41104065
// FIXME: Remove once support for ReuseShuffleIndices has been implemented
41114066
// for non-power-of-two vectors.
41124067
assert(
@@ -4298,6 +4253,10 @@ class BoUpSLP {
42984253
/// Maps a specific scalar to its tree entry(ies).
42994254
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
43004255

4256+
/// Maps the operand index and entry to the corresponding tree entry.
4257+
SmallDenseMap<std::pair<const TreeEntry *, unsigned>, TreeEntry *>
4258+
OperandsToTreeEntry;
4259+
43014260
/// Scalars, used in split vectorize nodes.
43024261
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarsInSplitNodes;
43034262

@@ -7411,11 +7370,11 @@ void BoUpSLP::reorderTopToBottom() {
74117370
}
74127371
}
74137372

7414-
bool BoUpSLP::canReorderOperands(
7373+
void BoUpSLP::buildReorderableOperands(
74157374
TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
7416-
ArrayRef<TreeEntry *> ReorderableGathers,
7375+
const SmallPtrSetImpl<const TreeEntry *> &ReorderableGathers,
74177376
SmallVectorImpl<TreeEntry *> &GatherOps) {
7418-
for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
7377+
for (unsigned I : seq<unsigned>(UserTE->getNumOperands())) {
74197378
if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
74207379
return OpData.first == I &&
74217380
(OpData.second->State == TreeEntry::Vectorize ||
@@ -7424,7 +7383,25 @@ bool BoUpSLP::canReorderOperands(
74247383
OpData.second->State == TreeEntry::SplitVectorize);
74257384
}))
74267385
continue;
7427-
if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
7386+
// Do not request operands, if they do not exist.
7387+
if (UserTE->hasState()) {
7388+
if (UserTE->getOpcode() == Instruction::ExtractElement ||
7389+
UserTE->getOpcode() == Instruction::ExtractValue)
7390+
continue;
7391+
if (UserTE->getOpcode() == Instruction::InsertElement && I == 0)
7392+
continue;
7393+
if (UserTE->getOpcode() == Instruction::Store &&
7394+
UserTE->State == TreeEntry::Vectorize && I == 1)
7395+
continue;
7396+
if (UserTE->getOpcode() == Instruction::Load &&
7397+
(UserTE->State == TreeEntry::Vectorize ||
7398+
UserTE->State == TreeEntry::StridedVectorize ||
7399+
UserTE->State == TreeEntry::CompressVectorize))
7400+
continue;
7401+
}
7402+
TreeEntry *TE = getOperandEntry(UserTE, I);
7403+
assert(TE && "Expected operand entry.");
7404+
if (!TE->isGather()) {
74287405
// Add the node to the list of the ordered nodes with the identity
74297406
// order.
74307407
Edges.emplace_back(I, TE);
@@ -7433,37 +7410,14 @@ bool BoUpSLP::canReorderOperands(
74337410
// simply add to the list of gathered ops.
74347411
// If there are reused scalars, process this node as a regular vectorize
74357412
// node, just reorder reuses mask.
7436-
if (TE->State != TreeEntry::Vectorize &&
7437-
TE->State != TreeEntry::StridedVectorize &&
7438-
TE->State != TreeEntry::CompressVectorize &&
7439-
TE->State != TreeEntry::SplitVectorize &&
7413+
if (TE->State == TreeEntry::ScatterVectorize &&
74407414
TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
74417415
GatherOps.push_back(TE);
74427416
continue;
74437417
}
7444-
TreeEntry *Gather = nullptr;
7445-
if (count_if(ReorderableGathers,
7446-
[&Gather, UserTE, I](TreeEntry *TE) {
7447-
assert(TE->State != TreeEntry::Vectorize &&
7448-
TE->State != TreeEntry::StridedVectorize &&
7449-
TE->State != TreeEntry::CompressVectorize &&
7450-
TE->State != TreeEntry::SplitVectorize &&
7451-
"Only non-vectorized nodes are expected.");
7452-
if (TE->UserTreeIndex.UserTE == UserTE &&
7453-
TE->UserTreeIndex.EdgeIdx == I) {
7454-
assert(TE->isSame(UserTE->getOperand(I)) &&
7455-
"Operand entry does not match operands.");
7456-
Gather = TE;
7457-
return true;
7458-
}
7459-
return false;
7460-
}) > 1 &&
7461-
!allConstant(UserTE->getOperand(I)))
7462-
return false;
7463-
if (Gather)
7464-
GatherOps.push_back(Gather);
7418+
if (ReorderableGathers.contains(TE))
7419+
GatherOps.push_back(TE);
74657420
}
7466-
return true;
74677421
}
74687422

74697423
void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
@@ -7479,13 +7433,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
74797433
// Find all reorderable leaf nodes with the given VF.
74807434
// Currently the are vectorized loads,extracts without alternate operands +
74817435
// some gathering of extracts.
7482-
SmallVector<TreeEntry *> NonVectorized;
7436+
SmallPtrSet<const TreeEntry *, 4> NonVectorized;
74837437
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
74847438
if (TE->State != TreeEntry::Vectorize &&
74857439
TE->State != TreeEntry::StridedVectorize &&
74867440
TE->State != TreeEntry::CompressVectorize &&
74877441
TE->State != TreeEntry::SplitVectorize)
7488-
NonVectorized.push_back(TE.get());
7442+
NonVectorized.insert(TE.get());
74897443
if (std::optional<OrdersType> CurrentOrder =
74907444
getReorderingData(*TE, /*TopToBottom=*/false, IgnoreReorder)) {
74917445
Queue.push(TE.get());
@@ -7584,11 +7538,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
75847538
}
75857539
// Check that operands are used only in the User node.
75867540
SmallVector<TreeEntry *> GatherOps;
7587-
if (!canReorderOperands(Data.first, Data.second, NonVectorized,
7588-
GatherOps)) {
7589-
Visited.insert_range(llvm::make_second_range(Data.second));
7590-
continue;
7591-
}
7541+
buildReorderableOperands(Data.first, Data.second, NonVectorized,
7542+
GatherOps);
75927543
// All operands are reordered and used only in this node - propagate the
75937544
// most used order to the user node.
75947545
MapVector<OrdersType, unsigned,
@@ -12916,33 +12867,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1291612867

1291712868
const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
1291812869
unsigned Idx) const {
12919-
ArrayRef<Value *> VL = E->getOperand(Idx);
12920-
InstructionsState S = getSameOpcode(VL, *TLI);
12921-
// Special processing for GEPs bundle, which may include non-gep values.
12922-
if (!S && VL.front()->getType()->isPointerTy()) {
12923-
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
12924-
if (It != VL.end())
12925-
S = getSameOpcode(*It, *TLI);
12926-
}
12927-
if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx, VL, S))
12928-
return VE;
12929-
if (S || !isConstant(VL.front())) {
12930-
for (const TreeEntry *VE :
12931-
ValueToGatherNodes.lookup(S ? S.getMainOp() : VL.front()))
12932-
if (VE->UserTreeIndex.EdgeIdx == Idx && VE->UserTreeIndex.UserTE == E) {
12933-
assert(VE->isSame(VL) && "Expected gather node with same values.");
12934-
return VE;
12935-
}
12936-
}
12937-
const auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
12938-
[&](const std::unique_ptr<TreeEntry> &TE) {
12939-
return (TE->isGather() ||
12940-
TE->State == TreeEntry::SplitVectorize) &&
12941-
TE->UserTreeIndex.EdgeIdx == Idx &&
12942-
TE->UserTreeIndex.UserTE == E;
12943-
});
12944-
assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
12945-
return It->get();
12870+
TreeEntry *Op = OperandsToTreeEntry.at({E, Idx});
12871+
assert(Op->isSame(E->getOperand(Idx)) && "Operands mismatch!");
12872+
return Op;
1294612873
}
1294712874

1294812875
TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
@@ -16914,121 +16841,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1691416841
}
1691516842
};
1691616843

16917-
BoUpSLP::TreeEntry *
16918-
BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
16919-
ArrayRef<Value *> VL,
16920-
const InstructionsState &S) {
16921-
if (!S)
16922-
return nullptr;
16923-
for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
16924-
if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
16925-
TE->isSame(VL))
16926-
return TE;
16927-
return nullptr;
16928-
}
16929-
1693016844
Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
16931-
ValueList &VL = E->getOperand(NodeIdx);
16932-
InstructionsState S = getSameOpcode(VL, *TLI);
16933-
// Special processing for GEPs bundle, which may include non-gep values.
16934-
if (!S && VL.front()->getType()->isPointerTy()) {
16935-
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
16936-
if (It != VL.end())
16937-
S = getSameOpcode(*It, *TLI);
16938-
}
16939-
const unsigned VF = VL.size();
16940-
if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx, VL, S)) {
16941-
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
16942-
// V may be affected by MinBWs.
16943-
// We want ShuffleInstructionBuilder to correctly support REVEC. The key
16944-
// factor is the number of elements, not their type.
16945-
Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
16946-
unsigned NumElements = getNumElements(VL.front()->getType());
16947-
ShuffleInstructionBuilder ShuffleBuilder(
16948-
NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
16949-
: ScalarTy,
16950-
Builder, *this);
16951-
ShuffleBuilder.add(V, Mask);
16952-
SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
16953-
E->CombinedEntriesWithIndices.size());
16954-
transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
16955-
[&](const auto &P) {
16956-
return std::make_pair(VectorizableTree[P.first].get(),
16957-
P.second);
16958-
});
16959-
assert((E->CombinedEntriesWithIndices.empty() ||
16960-
E->ReorderIndices.empty()) &&
16961-
"Expected either combined subnodes or reordering");
16962-
return ShuffleBuilder.finalize({}, SubVectors, {});
16963-
};
16964-
Value *V = vectorizeTree(VE);
16965-
if (VF * getNumElements(VL[0]->getType()) !=
16966-
cast<FixedVectorType>(V->getType())->getNumElements()) {
16967-
if (!VE->ReuseShuffleIndices.empty()) {
16968-
// Reshuffle to get only unique values.
16969-
// If some of the scalars are duplicated in the vectorization
16970-
// tree entry, we do not vectorize them but instead generate a
16971-
// mask for the reuses. But if there are several users of the
16972-
// same entry, they may have different vectorization factors.
16973-
// This is especially important for PHI nodes. In this case, we
16974-
// need to adapt the resulting instruction for the user
16975-
// vectorization factor and have to reshuffle it again to take
16976-
// only unique elements of the vector. Without this code the
16977-
// function incorrectly returns reduced vector instruction with
16978-
// the same elements, not with the unique ones.
16979-
16980-
// block:
16981-
// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
16982-
// %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
16983-
// ... (use %2)
16984-
// %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
16985-
// br %block
16986-
SmallVector<int> Mask(VF, PoisonMaskElem);
16987-
for (auto [I, V] : enumerate(VL)) {
16988-
if (isa<PoisonValue>(V))
16989-
continue;
16990-
Mask[I] = VE->findLaneForValue(V);
16991-
}
16992-
V = FinalShuffle(V, Mask);
16993-
} else {
16994-
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
16995-
"Expected vectorization factor less "
16996-
"than original vector size.");
16997-
SmallVector<int> UniformMask(VF, 0);
16998-
std::iota(UniformMask.begin(), UniformMask.end(), 0);
16999-
V = FinalShuffle(V, UniformMask);
17000-
}
17001-
}
17002-
// Need to update the operand gather node, if actually the operand is not a
17003-
// vectorized node, but the buildvector/gather node, which matches one of
17004-
// the vectorized nodes.
17005-
if (VE->UserTreeIndex.UserTE != E || VE->UserTreeIndex.EdgeIdx != NodeIdx) {
17006-
auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17007-
[&](const std::unique_ptr<TreeEntry> &TE) {
17008-
return TE->isGather() &&
17009-
TE->UserTreeIndex.UserTE == E &&
17010-
TE->UserTreeIndex.EdgeIdx == NodeIdx;
17011-
});
17012-
assert(It != VectorizableTree.end() && "Expected gather node operand.");
17013-
(*It)->VectorizedValue = V;
17014-
}
17015-
return V;
17016-
}
17017-
17018-
// Find the corresponding gather entry and vectorize it.
17019-
// Allows to be more accurate with tree/graph transformations, checks for the
17020-
// correctness of the transformations in many cases.
17021-
auto *I = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17022-
[E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
17023-
return TE->isOperandGatherNode({E, NodeIdx}) ||
17024-
(TE->State == TreeEntry::SplitVectorize &&
17025-
TE->UserTreeIndex == EdgeInfo(E, NodeIdx));
17026-
});
17027-
assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
17028-
assert(I->get()->UserTreeIndex &&
17029-
"Expected only single user for the gather node.");
17030-
assert(I->get()->isSame(VL) && "Expected same list of scalars.");
17031-
return vectorizeTree(I->get());
16845+
return vectorizeTree(getOperandEntry(E, NodeIdx));
1703216846
}
1703316847

1703416848
template <typename BVTy, typename ResTy, typename... Args>

0 commit comments

Comments
 (0)