@@ -1886,6 +1886,7 @@ class BoUpSLP {
1886
1886
void deleteTree() {
1887
1887
VectorizableTree.clear();
1888
1888
ScalarToTreeEntries.clear();
1889
+ OperandsToTreeEntry.clear();
1889
1890
ScalarsInSplitNodes.clear();
1890
1891
MustGather.clear();
1891
1892
NonScheduledFirst.clear();
@@ -3401,54 +3402,23 @@ class BoUpSLP {
3401
3402
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
3402
3403
bool &IsProfitableToDemote, bool IsTruncRoot) const;
3403
3404
3404
- /// Check if the operands on the edges \p Edges of the \p UserTE allows
3405
- /// reordering (i.e. the operands can be reordered because they have only one
3406
- /// user and reordarable).
3405
+ /// Builds the list of reorderable operands on the edges \p Edges of the \p
3406
+ /// UserTE, which allow reordering (i.e. the operands can be reordered because
3407
+ /// they have only one user and reordarable).
3407
3408
/// \param ReorderableGathers List of all gather nodes that require reordering
3408
3409
/// (e.g., gather of extractlements or partially vectorizable loads).
3409
3410
/// \param GatherOps List of gather operand nodes for \p UserTE that require
3410
3411
/// reordering, subset of \p NonVectorized.
3411
- bool
3412
- canReorderOperands( TreeEntry *UserTE,
3413
- SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3414
- ArrayRef< TreeEntry *> ReorderableGathers,
3415
- SmallVectorImpl<TreeEntry *> &GatherOps);
3412
+ void buildReorderableOperands(
3413
+ TreeEntry *UserTE,
3414
+ SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
3415
+ const SmallPtrSetImpl<const TreeEntry *> & ReorderableGathers,
3416
+ SmallVectorImpl<TreeEntry *> &GatherOps);
3416
3417
3417
3418
/// Checks if the given \p TE is a gather node with clustered reused scalars
3418
3419
/// and reorders it per given \p Mask.
3419
3420
void reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const;
3420
3421
3421
- /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3422
- /// if any. If it is not vectorized (gather node), returns nullptr.
3423
- TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
3424
- ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
3425
- TreeEntry *TE = nullptr;
3426
- const auto *It = find_if(VL, [&](Value *V) {
3427
- if (!isa<Instruction>(V))
3428
- return false;
3429
- for (TreeEntry *E : getTreeEntries(V)) {
3430
- if (E->UserTreeIndex == EdgeInfo(UserTE, OpIdx)) {
3431
- TE = E;
3432
- return true;
3433
- }
3434
- }
3435
- return false;
3436
- });
3437
- if (It != VL.end()) {
3438
- assert(TE->isSame(VL) && "Expected same scalars.");
3439
- return TE;
3440
- }
3441
- return nullptr;
3442
- }
3443
-
3444
- /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
3445
- /// if any. If it is not vectorized (gather node), returns nullptr.
3446
- const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE,
3447
- unsigned OpIdx) const {
3448
- return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
3449
- const_cast<TreeEntry *>(UserTE), OpIdx);
3450
- }
3451
-
3452
3422
/// Checks if all users of \p I are the part of the vectorization tree.
3453
3423
bool areAllUsersVectorized(
3454
3424
Instruction *I,
@@ -3509,19 +3479,6 @@ class BoUpSLP {
3509
3479
/// Vectorize a single entry in the tree.
3510
3480
Value *vectorizeTree(TreeEntry *E);
3511
3481
3512
- /// Returns vectorized operand node, that matches the order of the scalars
3513
- /// operand number \p NodeIdx in entry \p E.
3514
- TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3515
- ArrayRef<Value *> VL,
3516
- const InstructionsState &S);
3517
- const TreeEntry *
3518
- getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
3519
- ArrayRef<Value *> VL,
3520
- const InstructionsState &S) const {
3521
- return const_cast<BoUpSLP *>(this)->getMatchedVectorizedOperand(E, NodeIdx,
3522
- VL, S);
3523
- }
3524
-
3525
3482
/// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
3526
3483
/// \p E.
3527
3484
Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
@@ -3715,11 +3672,6 @@ class BoUpSLP {
3715
3672
return IsSame(Scalars, ReuseShuffleIndices);
3716
3673
}
3717
3674
3718
- bool isOperandGatherNode(const EdgeInfo &UserEI) const {
3719
- return isGather() && UserTreeIndex.EdgeIdx == UserEI.EdgeIdx &&
3720
- UserTreeIndex.UserTE == UserEI.UserTE;
3721
- }
3722
-
3723
3675
/// \returns true if current entry has same operands as \p TE.
3724
3676
bool hasEqualOperands(const TreeEntry &TE) const {
3725
3677
if (TE.getNumOperands() != getNumOperands())
@@ -4107,6 +4059,9 @@ class BoUpSLP {
4107
4059
TreeEntry *Last = VectorizableTree.back().get();
4108
4060
Last->Idx = VectorizableTree.size() - 1;
4109
4061
Last->State = EntryState;
4062
+ if (UserTreeIdx.UserTE)
4063
+ OperandsToTreeEntry.try_emplace(
4064
+ std::make_pair(UserTreeIdx.UserTE, UserTreeIdx.EdgeIdx), Last);
4110
4065
// FIXME: Remove once support for ReuseShuffleIndices has been implemented
4111
4066
// for non-power-of-two vectors.
4112
4067
assert(
@@ -4298,6 +4253,10 @@ class BoUpSLP {
4298
4253
/// Maps a specific scalar to its tree entry(ies).
4299
4254
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
4300
4255
4256
+ /// Maps the operand index and entry to the corresponding tree entry.
4257
+ SmallDenseMap<std::pair<const TreeEntry *, unsigned>, TreeEntry *>
4258
+ OperandsToTreeEntry;
4259
+
4301
4260
/// Scalars, used in split vectorize nodes.
4302
4261
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarsInSplitNodes;
4303
4262
@@ -7411,11 +7370,11 @@ void BoUpSLP::reorderTopToBottom() {
7411
7370
}
7412
7371
}
7413
7372
7414
- bool BoUpSLP::canReorderOperands (
7373
+ void BoUpSLP::buildReorderableOperands (
7415
7374
TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
7416
- ArrayRef< TreeEntry *> ReorderableGathers,
7375
+ const SmallPtrSetImpl<const TreeEntry *> & ReorderableGathers,
7417
7376
SmallVectorImpl<TreeEntry *> &GatherOps) {
7418
- for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I ) {
7377
+ for (unsigned I : seq<unsigned>( UserTE->getNumOperands()) ) {
7419
7378
if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
7420
7379
return OpData.first == I &&
7421
7380
(OpData.second->State == TreeEntry::Vectorize ||
@@ -7424,7 +7383,25 @@ bool BoUpSLP::canReorderOperands(
7424
7383
OpData.second->State == TreeEntry::SplitVectorize);
7425
7384
}))
7426
7385
continue;
7427
- if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
7386
+ // Do not request operands, if they do not exist.
7387
+ if (UserTE->hasState()) {
7388
+ if (UserTE->getOpcode() == Instruction::ExtractElement ||
7389
+ UserTE->getOpcode() == Instruction::ExtractValue)
7390
+ continue;
7391
+ if (UserTE->getOpcode() == Instruction::InsertElement && I == 0)
7392
+ continue;
7393
+ if (UserTE->getOpcode() == Instruction::Store &&
7394
+ UserTE->State == TreeEntry::Vectorize && I == 1)
7395
+ continue;
7396
+ if (UserTE->getOpcode() == Instruction::Load &&
7397
+ (UserTE->State == TreeEntry::Vectorize ||
7398
+ UserTE->State == TreeEntry::StridedVectorize ||
7399
+ UserTE->State == TreeEntry::CompressVectorize))
7400
+ continue;
7401
+ }
7402
+ TreeEntry *TE = getOperandEntry(UserTE, I);
7403
+ assert(TE && "Expected operand entry.");
7404
+ if (!TE->isGather()) {
7428
7405
// Add the node to the list of the ordered nodes with the identity
7429
7406
// order.
7430
7407
Edges.emplace_back(I, TE);
@@ -7433,37 +7410,14 @@ bool BoUpSLP::canReorderOperands(
7433
7410
// simply add to the list of gathered ops.
7434
7411
// If there are reused scalars, process this node as a regular vectorize
7435
7412
// node, just reorder reuses mask.
7436
- if (TE->State != TreeEntry::Vectorize &&
7437
- TE->State != TreeEntry::StridedVectorize &&
7438
- TE->State != TreeEntry::CompressVectorize &&
7439
- TE->State != TreeEntry::SplitVectorize &&
7413
+ if (TE->State == TreeEntry::ScatterVectorize &&
7440
7414
TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
7441
7415
GatherOps.push_back(TE);
7442
7416
continue;
7443
7417
}
7444
- TreeEntry *Gather = nullptr;
7445
- if (count_if(ReorderableGathers,
7446
- [&Gather, UserTE, I](TreeEntry *TE) {
7447
- assert(TE->State != TreeEntry::Vectorize &&
7448
- TE->State != TreeEntry::StridedVectorize &&
7449
- TE->State != TreeEntry::CompressVectorize &&
7450
- TE->State != TreeEntry::SplitVectorize &&
7451
- "Only non-vectorized nodes are expected.");
7452
- if (TE->UserTreeIndex.UserTE == UserTE &&
7453
- TE->UserTreeIndex.EdgeIdx == I) {
7454
- assert(TE->isSame(UserTE->getOperand(I)) &&
7455
- "Operand entry does not match operands.");
7456
- Gather = TE;
7457
- return true;
7458
- }
7459
- return false;
7460
- }) > 1 &&
7461
- !allConstant(UserTE->getOperand(I)))
7462
- return false;
7463
- if (Gather)
7464
- GatherOps.push_back(Gather);
7418
+ if (ReorderableGathers.contains(TE))
7419
+ GatherOps.push_back(TE);
7465
7420
}
7466
- return true;
7467
7421
}
7468
7422
7469
7423
void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
@@ -7479,13 +7433,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
7479
7433
// Find all reorderable leaf nodes with the given VF.
7480
7434
// Currently the are vectorized loads,extracts without alternate operands +
7481
7435
// some gathering of extracts.
7482
- SmallVector< TreeEntry *> NonVectorized;
7436
+ SmallPtrSet<const TreeEntry *, 4 > NonVectorized;
7483
7437
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
7484
7438
if (TE->State != TreeEntry::Vectorize &&
7485
7439
TE->State != TreeEntry::StridedVectorize &&
7486
7440
TE->State != TreeEntry::CompressVectorize &&
7487
7441
TE->State != TreeEntry::SplitVectorize)
7488
- NonVectorized.push_back (TE.get());
7442
+ NonVectorized.insert (TE.get());
7489
7443
if (std::optional<OrdersType> CurrentOrder =
7490
7444
getReorderingData(*TE, /*TopToBottom=*/false, IgnoreReorder)) {
7491
7445
Queue.push(TE.get());
@@ -7584,11 +7538,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
7584
7538
}
7585
7539
// Check that operands are used only in the User node.
7586
7540
SmallVector<TreeEntry *> GatherOps;
7587
- if (!canReorderOperands(Data.first, Data.second, NonVectorized,
7588
- GatherOps)) {
7589
- Visited.insert_range(llvm::make_second_range(Data.second));
7590
- continue;
7591
- }
7541
+ buildReorderableOperands(Data.first, Data.second, NonVectorized,
7542
+ GatherOps);
7592
7543
// All operands are reordered and used only in this node - propagate the
7593
7544
// most used order to the user node.
7594
7545
MapVector<OrdersType, unsigned,
@@ -12916,33 +12867,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
12916
12867
12917
12868
const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
12918
12869
unsigned Idx) const {
12919
- ArrayRef<Value *> VL = E->getOperand(Idx);
12920
- InstructionsState S = getSameOpcode(VL, *TLI);
12921
- // Special processing for GEPs bundle, which may include non-gep values.
12922
- if (!S && VL.front()->getType()->isPointerTy()) {
12923
- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
12924
- if (It != VL.end())
12925
- S = getSameOpcode(*It, *TLI);
12926
- }
12927
- if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx, VL, S))
12928
- return VE;
12929
- if (S || !isConstant(VL.front())) {
12930
- for (const TreeEntry *VE :
12931
- ValueToGatherNodes.lookup(S ? S.getMainOp() : VL.front()))
12932
- if (VE->UserTreeIndex.EdgeIdx == Idx && VE->UserTreeIndex.UserTE == E) {
12933
- assert(VE->isSame(VL) && "Expected gather node with same values.");
12934
- return VE;
12935
- }
12936
- }
12937
- const auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
12938
- [&](const std::unique_ptr<TreeEntry> &TE) {
12939
- return (TE->isGather() ||
12940
- TE->State == TreeEntry::SplitVectorize) &&
12941
- TE->UserTreeIndex.EdgeIdx == Idx &&
12942
- TE->UserTreeIndex.UserTE == E;
12943
- });
12944
- assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
12945
- return It->get();
12870
+ TreeEntry *Op = OperandsToTreeEntry.at({E, Idx});
12871
+ assert(Op->isSame(E->getOperand(Idx)) && "Operands mismatch!");
12872
+ return Op;
12946
12873
}
12947
12874
12948
12875
TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
@@ -16914,121 +16841,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
16914
16841
}
16915
16842
};
16916
16843
16917
- BoUpSLP::TreeEntry *
16918
- BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
16919
- ArrayRef<Value *> VL,
16920
- const InstructionsState &S) {
16921
- if (!S)
16922
- return nullptr;
16923
- for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
16924
- if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
16925
- TE->isSame(VL))
16926
- return TE;
16927
- return nullptr;
16928
- }
16929
-
16930
16844
Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
16931
- ValueList &VL = E->getOperand(NodeIdx);
16932
- InstructionsState S = getSameOpcode(VL, *TLI);
16933
- // Special processing for GEPs bundle, which may include non-gep values.
16934
- if (!S && VL.front()->getType()->isPointerTy()) {
16935
- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
16936
- if (It != VL.end())
16937
- S = getSameOpcode(*It, *TLI);
16938
- }
16939
- const unsigned VF = VL.size();
16940
- if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx, VL, S)) {
16941
- auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
16942
- // V may be affected by MinBWs.
16943
- // We want ShuffleInstructionBuilder to correctly support REVEC. The key
16944
- // factor is the number of elements, not their type.
16945
- Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
16946
- unsigned NumElements = getNumElements(VL.front()->getType());
16947
- ShuffleInstructionBuilder ShuffleBuilder(
16948
- NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
16949
- : ScalarTy,
16950
- Builder, *this);
16951
- ShuffleBuilder.add(V, Mask);
16952
- SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
16953
- E->CombinedEntriesWithIndices.size());
16954
- transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
16955
- [&](const auto &P) {
16956
- return std::make_pair(VectorizableTree[P.first].get(),
16957
- P.second);
16958
- });
16959
- assert((E->CombinedEntriesWithIndices.empty() ||
16960
- E->ReorderIndices.empty()) &&
16961
- "Expected either combined subnodes or reordering");
16962
- return ShuffleBuilder.finalize({}, SubVectors, {});
16963
- };
16964
- Value *V = vectorizeTree(VE);
16965
- if (VF * getNumElements(VL[0]->getType()) !=
16966
- cast<FixedVectorType>(V->getType())->getNumElements()) {
16967
- if (!VE->ReuseShuffleIndices.empty()) {
16968
- // Reshuffle to get only unique values.
16969
- // If some of the scalars are duplicated in the vectorization
16970
- // tree entry, we do not vectorize them but instead generate a
16971
- // mask for the reuses. But if there are several users of the
16972
- // same entry, they may have different vectorization factors.
16973
- // This is especially important for PHI nodes. In this case, we
16974
- // need to adapt the resulting instruction for the user
16975
- // vectorization factor and have to reshuffle it again to take
16976
- // only unique elements of the vector. Without this code the
16977
- // function incorrectly returns reduced vector instruction with
16978
- // the same elements, not with the unique ones.
16979
-
16980
- // block:
16981
- // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
16982
- // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
16983
- // ... (use %2)
16984
- // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
16985
- // br %block
16986
- SmallVector<int> Mask(VF, PoisonMaskElem);
16987
- for (auto [I, V] : enumerate(VL)) {
16988
- if (isa<PoisonValue>(V))
16989
- continue;
16990
- Mask[I] = VE->findLaneForValue(V);
16991
- }
16992
- V = FinalShuffle(V, Mask);
16993
- } else {
16994
- assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
16995
- "Expected vectorization factor less "
16996
- "than original vector size.");
16997
- SmallVector<int> UniformMask(VF, 0);
16998
- std::iota(UniformMask.begin(), UniformMask.end(), 0);
16999
- V = FinalShuffle(V, UniformMask);
17000
- }
17001
- }
17002
- // Need to update the operand gather node, if actually the operand is not a
17003
- // vectorized node, but the buildvector/gather node, which matches one of
17004
- // the vectorized nodes.
17005
- if (VE->UserTreeIndex.UserTE != E || VE->UserTreeIndex.EdgeIdx != NodeIdx) {
17006
- auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17007
- [&](const std::unique_ptr<TreeEntry> &TE) {
17008
- return TE->isGather() &&
17009
- TE->UserTreeIndex.UserTE == E &&
17010
- TE->UserTreeIndex.EdgeIdx == NodeIdx;
17011
- });
17012
- assert(It != VectorizableTree.end() && "Expected gather node operand.");
17013
- (*It)->VectorizedValue = V;
17014
- }
17015
- return V;
17016
- }
17017
-
17018
- // Find the corresponding gather entry and vectorize it.
17019
- // Allows to be more accurate with tree/graph transformations, checks for the
17020
- // correctness of the transformations in many cases.
17021
- auto *I = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
17022
- [E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
17023
- return TE->isOperandGatherNode({E, NodeIdx}) ||
17024
- (TE->State == TreeEntry::SplitVectorize &&
17025
- TE->UserTreeIndex == EdgeInfo(E, NodeIdx));
17026
- });
17027
- assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
17028
- assert(I->get()->UserTreeIndex &&
17029
- "Expected only single user for the gather node.");
17030
- assert(I->get()->isSame(VL) && "Expected same list of scalars.");
17031
- return vectorizeTree(I->get());
16845
+ return vectorizeTree(getOperandEntry(E, NodeIdx));
17032
16846
}
17033
16847
17034
16848
template <typename BVTy, typename ResTy, typename... Args>
0 commit comments