@@ -2931,13 +2931,11 @@ class BoUpSLP {
2931
2931
/// truncation. We collect the entries that will be demoted in ToDemote.
2932
2932
/// \param E Node for analysis
2933
2933
/// \param ToDemote indices of the nodes to be demoted.
2934
- bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot,
2935
- unsigned &BitWidth,
2936
- SmallVectorImpl<unsigned> &ToDemote,
2937
- DenseSet<const TreeEntry *> &Visited,
2938
- unsigned &MaxDepthLevel,
2939
- bool &IsProfitableToDemote,
2940
- bool IsTruncRoot) const;
2934
+ bool collectValuesToDemote(
2935
+ const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
2936
+ SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
2937
+ const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
2938
+ bool &IsProfitableToDemote, bool IsTruncRoot) const;
2941
2939
2942
2940
/// Check if the operands on the edges \p Edges of the \p UserTE allows
2943
2941
/// reordering (i.e. the operands can be reordered because they have only one
@@ -17515,8 +17513,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
17515
17513
bool BoUpSLP::collectValuesToDemote(
17516
17514
const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
17517
17515
SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
17518
- unsigned &MaxDepthLevel, bool &IsProfitableToDemote ,
17519
- bool IsTruncRoot) const {
17516
+ const SmallDenseSet< unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel ,
17517
+ bool &IsProfitableToDemote, bool IsTruncRoot) const {
17520
17518
// We can always demote constants.
17521
17519
if (all_of(E.Scalars, IsaPred<Constant>))
17522
17520
return true;
@@ -17528,6 +17526,10 @@ bool BoUpSLP::collectValuesToDemote(
17528
17526
return true;
17529
17527
}
17530
17528
17529
+ // Check if the node was analyzed already and must keep its original bitwidth.
17530
+ if (NodesToKeepBWs.contains(E.Idx))
17531
+ return false;
17532
+
17531
17533
// If the value is not a vectorized instruction in the expression and not used
17532
17534
// by the insertelement instruction and not used in multiple vector nodes, it
17533
17535
// cannot be demoted.
@@ -17623,8 +17625,8 @@ bool BoUpSLP::collectValuesToDemote(
17623
17625
for (const TreeEntry *Op : Operands) {
17624
17626
unsigned Level = InitLevel;
17625
17627
if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth,
17626
- ToDemote, Visited, Level, IsProfitableToDemote ,
17627
- IsTruncRoot)) {
17628
+ ToDemote, Visited, NodesToKeepBWs, Level ,
17629
+ IsProfitableToDemote, IsTruncRoot)) {
17628
17630
if (!IsProfitableToDemote)
17629
17631
return false;
17630
17632
NeedToExit = true;
@@ -17926,6 +17928,7 @@ void BoUpSLP::computeMinimumValueSizes() {
17926
17928
bool IsTruncRoot = false;
17927
17929
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
17928
17930
SmallVector<unsigned> RootDemotes;
17931
+ SmallDenseSet<unsigned, 8> NodesToKeepBWs;
17929
17932
if (NodeIdx != 0 &&
17930
17933
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
17931
17934
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
@@ -17949,6 +17952,7 @@ void BoUpSLP::computeMinimumValueSizes() {
17949
17952
// Check if the root is trunc and the next node is gather/buildvector, then
17950
17953
// keep trunc in scalars, which is free in most cases.
17951
17954
if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
17955
+ !NodesToKeepBWs.contains(E.Idx) &&
17952
17956
E.Idx > (IsStoreOrInsertElt ? 2u : 1u) &&
17953
17957
all_of(E.Scalars, [&](Value *V) {
17954
17958
return V->hasOneUse() || isa<Constant>(V) ||
@@ -18071,8 +18075,8 @@ void BoUpSLP::computeMinimumValueSizes() {
18071
18075
bool NeedToDemote = IsProfitableToDemote;
18072
18076
18073
18077
if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth,
18074
- ToDemote, Visited, MaxDepthLevel, NeedToDemote ,
18075
- IsTruncRoot) ||
18078
+ ToDemote, Visited, NodesToKeepBWs, MaxDepthLevel ,
18079
+ NeedToDemote, IsTruncRoot) ||
18076
18080
(MaxDepthLevel <= Limit &&
18077
18081
!(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
18078
18082
(!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) ||
@@ -18206,14 +18210,15 @@ void BoUpSLP::computeMinimumValueSizes() {
18206
18210
});
18207
18211
}
18208
18212
18209
- // If the maximum bit width we compute is less than the with of the roots'
18213
+ // If the maximum bit width we compute is less than the width of the roots'
18210
18214
// type, we can proceed with the narrowing. Otherwise, do nothing.
18211
18215
if (MaxBitWidth == 0 ||
18212
18216
MaxBitWidth >=
18213
18217
cast<IntegerType>(TreeRoot.front()->getType()->getScalarType())
18214
18218
->getBitWidth()) {
18215
18219
if (UserIgnoreList)
18216
18220
AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
18221
+ NodesToKeepBWs.insert(ToDemote.begin(), ToDemote.end());
18217
18222
continue;
18218
18223
}
18219
18224
0 commit comments