@@ -2283,6 +2283,14 @@ class BoUpSLP {
2283
2283
~BoUpSLP();
2284
2284
2285
2285
private:
2286
+ /// Determine if a vectorized value \p V in can be demoted to
2287
+ /// a smaller type with a truncation. We collect the values that will be
2288
+ /// demoted in ToDemote and additional roots that require investigating in
2289
+ /// Roots.
2290
+ bool collectValuesToDemote(Value *V, SmallVectorImpl<Value *> &ToDemote,
2291
+ SmallVectorImpl<Value *> &Roots,
2292
+ DenseSet<Value *> &Visited) const;
2293
+
2286
2294
/// Check if the operands on the edges \p Edges of the \p UserTE allows
2287
2295
/// reordering (i.e. the operands can be reordered because they have only one
2288
2296
/// user and reordarable).
@@ -9044,8 +9052,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
9044
9052
// for the extract and the added cost of the sign extend if needed.
9045
9053
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
9046
9054
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
9047
- auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
9048
- auto It = MinBWs.find(ScalarRoot);
9055
+ auto It = MinBWs.find(EU.Scalar);
9049
9056
if (It != MinBWs.end()) {
9050
9057
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
9051
9058
unsigned Extend =
@@ -13074,19 +13081,20 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
13074
13081
// Determine if a value V in a vectorizable expression Expr can be demoted to a
13075
13082
// smaller type with a truncation. We collect the values that will be demoted
13076
13083
// in ToDemote and additional roots that require investigating in Roots.
13077
- static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
13078
- SmallVectorImpl<Value *> &ToDemote,
13079
- SmallVectorImpl<Value *> &Roots) {
13084
+ bool BoUpSLP::collectValuesToDemote(Value *V,
13085
+ SmallVectorImpl<Value *> &ToDemote,
13086
+ SmallVectorImpl<Value *> &Roots,
13087
+ DenseSet<Value *> &Visited) const {
13080
13088
// We can always demote constants.
13081
13089
if (isa<Constant>(V)) {
13082
13090
ToDemote.push_back(V);
13083
13091
return true;
13084
13092
}
13085
13093
13086
- // If the value is not an instruction in the expression with only one use, it
13087
- // cannot be demoted.
13094
+ // If the value is not a vectorized instruction in the expression with only
13095
+ // one use, it cannot be demoted.
13088
13096
auto *I = dyn_cast<Instruction>(V);
13089
- if (!I || !I->hasOneUse() || !Expr.count (I))
13097
+ if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert (I).second )
13090
13098
return false;
13091
13099
13092
13100
switch (I->getOpcode()) {
@@ -13110,16 +13118,16 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
13110
13118
case Instruction::And:
13111
13119
case Instruction::Or:
13112
13120
case Instruction::Xor:
13113
- if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
13114
- !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
13121
+ if (!collectValuesToDemote(I->getOperand(0), ToDemote, Roots, Visited ) ||
13122
+ !collectValuesToDemote(I->getOperand(1), ToDemote, Roots, Visited ))
13115
13123
return false;
13116
13124
break;
13117
13125
13118
13126
// We can demote selects if we can demote their true and false values.
13119
13127
case Instruction::Select: {
13120
13128
SelectInst *SI = cast<SelectInst>(I);
13121
- if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
13122
- !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
13129
+ if (!collectValuesToDemote(SI->getTrueValue(), ToDemote, Roots, Visited ) ||
13130
+ !collectValuesToDemote(SI->getFalseValue(), ToDemote, Roots, Visited ))
13123
13131
return false;
13124
13132
break;
13125
13133
}
@@ -13129,7 +13137,7 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
13129
13137
case Instruction::PHI: {
13130
13138
PHINode *PN = cast<PHINode>(I);
13131
13139
for (Value *IncValue : PN->incoming_values())
13132
- if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
13140
+ if (!collectValuesToDemote(IncValue, ToDemote, Roots, Visited ))
13133
13141
return false;
13134
13142
break;
13135
13143
}
@@ -13156,36 +13164,20 @@ void BoUpSLP::computeMinimumValueSizes() {
13156
13164
if (!TreeRootIT)
13157
13165
return;
13158
13166
13159
- // If the expression is not rooted by a store, these roots should have
13160
- // external uses.
13161
- // TOSO: investigate if this can be relaxed.
13162
- SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
13163
- for (auto &EU : ExternalUses)
13164
- if (!Expr.erase(EU.Scalar))
13165
- return;
13166
- if (!Expr.empty())
13167
+ // Ensure the roots of the vectorizable tree don't form a cycle.
13168
+ if (!VectorizableTree.front()->UserTreeIndices.empty())
13167
13169
return;
13168
13170
13169
- // Collect the scalar values of the vectorizable expression. We will use this
13170
- // context to determine which values can be demoted. If we see a truncation,
13171
- // we mark it as seeding another demotion.
13172
- for (auto &EntryPtr : VectorizableTree)
13173
- Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
13174
-
13175
- // Ensure the roots of the vectorizable tree don't form a cycle. They must
13176
- // have a single external user that is not in the vectorizable tree.
13177
- for (auto *Root : TreeRoot)
13178
- if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
13179
- return;
13180
-
13181
13171
// Conservatively determine if we can actually truncate the roots of the
13182
13172
// expression. Collect the values that can be demoted in ToDemote and
13183
13173
// additional roots that require investigating in Roots.
13184
13174
SmallVector<Value *, 32> ToDemote;
13185
13175
SmallVector<Value *, 4> Roots;
13186
- for (auto *Root : TreeRoot)
13187
- if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
13176
+ for (auto *Root : TreeRoot) {
13177
+ DenseSet<Value *> Visited;
13178
+ if (!collectValuesToDemote(Root, ToDemote, Roots, Visited))
13188
13179
return;
13180
+ }
13189
13181
13190
13182
// The maximum bit width required to represent all the values that can be
13191
13183
// demoted without loss of precision. It would be safe to truncate the roots
@@ -13215,9 +13207,9 @@ void BoUpSLP::computeMinimumValueSizes() {
13215
13207
// maximum bit width required to store the scalar by using ValueTracking to
13216
13208
// compute the number of high-order bits we can truncate.
13217
13209
if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
13218
- llvm:: all_of(TreeRoot, [](Value *R ) {
13219
- assert(R->hasOneUse() && "Root should have only one use!");
13220
- return isa<GetElementPtrInst>(R->user_back() );
13210
+ all_of(TreeRoot, [](Value *V ) {
13211
+ return all_of(V->users(),
13212
+ [](User *U) { return isa<GetElementPtrInst>(U); } );
13221
13213
})) {
13222
13214
MaxBitWidth = 8u;
13223
13215
@@ -13266,8 +13258,10 @@ void BoUpSLP::computeMinimumValueSizes() {
13266
13258
// If we can truncate the root, we must collect additional values that might
13267
13259
// be demoted as a result. That is, those seeded by truncations we will
13268
13260
// modify.
13269
- while (!Roots.empty())
13270
- collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
13261
+ while (!Roots.empty()) {
13262
+ DenseSet<Value *> Visited;
13263
+ collectValuesToDemote(Roots.pop_back_val(), ToDemote, Roots, Visited);
13264
+ }
13271
13265
13272
13266
// Finally, map the values we can demote to the maximum bit with we computed.
13273
13267
DenseMap<const TreeEntry *, bool> Signendness;
0 commit comments