@@ -2285,6 +2285,14 @@ class BoUpSLP {
2285
2285
~BoUpSLP();
2286
2286
2287
2287
private:
2288
+ /// Determine if a vectorized value \p V in can be demoted to
2289
+ /// a smaller type with a truncation. We collect the values that will be
2290
+ /// demoted in ToDemote and additional roots that require investigating in
2291
+ /// Roots.
2292
+ bool collectValuesToDemote(Value *V, SmallVectorImpl<Value *> &ToDemote,
2293
+ SmallVectorImpl<Value *> &Roots,
2294
+ DenseSet<Value *> &Visited) const;
2295
+
2288
2296
/// Check if the operands on the edges \p Edges of the \p UserTE allows
2289
2297
/// reordering (i.e. the operands can be reordered because they have only one
2290
2298
/// user and reordarable).
@@ -9024,8 +9032,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
9024
9032
// for the extract and the added cost of the sign extend if needed.
9025
9033
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
9026
9034
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
9027
- auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
9028
- auto It = MinBWs.find(ScalarRoot);
9035
+ auto It = MinBWs.find(EU.Scalar);
9029
9036
if (It != MinBWs.end()) {
9030
9037
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
9031
9038
unsigned Extend =
@@ -13059,19 +13066,20 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
13059
13066
// Determine if a value V in a vectorizable expression Expr can be demoted to a
13060
13067
// smaller type with a truncation. We collect the values that will be demoted
13061
13068
// in ToDemote and additional roots that require investigating in Roots.
13062
- static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
13063
- SmallVectorImpl<Value *> &ToDemote,
13064
- SmallVectorImpl<Value *> &Roots) {
13069
+ bool BoUpSLP::collectValuesToDemote(Value *V,
13070
+ SmallVectorImpl<Value *> &ToDemote,
13071
+ SmallVectorImpl<Value *> &Roots,
13072
+ DenseSet<Value *> &Visited) const {
13065
13073
// We can always demote constants.
13066
13074
if (isa<Constant>(V)) {
13067
13075
ToDemote.push_back(V);
13068
13076
return true;
13069
13077
}
13070
13078
13071
- // If the value is not an instruction in the expression with only one use, it
13072
- // cannot be demoted.
13079
+ // If the value is not a vectorized instruction in the expression with only
13080
+ // one use, it cannot be demoted.
13073
13081
auto *I = dyn_cast<Instruction>(V);
13074
- if (!I || !I->hasOneUse() || !Expr.count (I))
13082
+ if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert (I).second )
13075
13083
return false;
13076
13084
13077
13085
switch (I->getOpcode()) {
@@ -13095,16 +13103,16 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
13095
13103
case Instruction::And:
13096
13104
case Instruction::Or:
13097
13105
case Instruction::Xor:
13098
- if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
13099
- !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
13106
+ if (!collectValuesToDemote(I->getOperand(0), ToDemote, Roots, Visited ) ||
13107
+ !collectValuesToDemote(I->getOperand(1), ToDemote, Roots, Visited ))
13100
13108
return false;
13101
13109
break;
13102
13110
13103
13111
// We can demote selects if we can demote their true and false values.
13104
13112
case Instruction::Select: {
13105
13113
SelectInst *SI = cast<SelectInst>(I);
13106
- if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
13107
- !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
13114
+ if (!collectValuesToDemote(SI->getTrueValue(), ToDemote, Roots, Visited ) ||
13115
+ !collectValuesToDemote(SI->getFalseValue(), ToDemote, Roots, Visited ))
13108
13116
return false;
13109
13117
break;
13110
13118
}
@@ -13114,7 +13122,7 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
13114
13122
case Instruction::PHI: {
13115
13123
PHINode *PN = cast<PHINode>(I);
13116
13124
for (Value *IncValue : PN->incoming_values())
13117
- if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
13125
+ if (!collectValuesToDemote(IncValue, ToDemote, Roots, Visited ))
13118
13126
return false;
13119
13127
break;
13120
13128
}
@@ -13141,36 +13149,16 @@ void BoUpSLP::computeMinimumValueSizes() {
13141
13149
if (!TreeRootIT)
13142
13150
return;
13143
13151
13144
- // If the expression is not rooted by a store, these roots should have
13145
- // external uses.
13146
- // TOSO: investigate if this can be relaxed.
13147
- SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
13148
- for (auto &EU : ExternalUses)
13149
- if (!Expr.erase(EU.Scalar))
13150
- return;
13151
- if (!Expr.empty())
13152
- return;
13153
-
13154
- // Collect the scalar values of the vectorizable expression. We will use this
13155
- // context to determine which values can be demoted. If we see a truncation,
13156
- // we mark it as seeding another demotion.
13157
- for (auto &EntryPtr : VectorizableTree)
13158
- Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
13159
-
13160
- // Ensure the roots of the vectorizable tree don't form a cycle. They must
13161
- // have a single external user that is not in the vectorizable tree.
13162
- for (auto *Root : TreeRoot)
13163
- if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
13164
- return;
13165
-
13166
13152
// Conservatively determine if we can actually truncate the roots of the
13167
13153
// expression. Collect the values that can be demoted in ToDemote and
13168
13154
// additional roots that require investigating in Roots.
13169
13155
SmallVector<Value *, 32> ToDemote;
13170
13156
SmallVector<Value *, 4> Roots;
13171
- for (auto *Root : TreeRoot)
13172
- if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
13157
+ for (auto *Root : TreeRoot) {
13158
+ DenseSet<Value *> Visited;
13159
+ if (!collectValuesToDemote(Root, ToDemote, Roots, Visited))
13173
13160
return;
13161
+ }
13174
13162
13175
13163
// The maximum bit width required to represent all the values that can be
13176
13164
// demoted without loss of precision. It would be safe to truncate the roots
@@ -13200,9 +13188,9 @@ void BoUpSLP::computeMinimumValueSizes() {
13200
13188
// maximum bit width required to store the scalar by using ValueTracking to
13201
13189
// compute the number of high-order bits we can truncate.
13202
13190
if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
13203
- llvm:: all_of(TreeRoot, [](Value *R ) {
13204
- assert(R->hasOneUse() && "Root should have only one use!");
13205
- return isa<GetElementPtrInst>(R->user_back() );
13191
+ all_of(TreeRoot, [](Value *V ) {
13192
+ return all_of(V->users(),
13193
+ [](User *U) { return isa<GetElementPtrInst>(U); } );
13206
13194
})) {
13207
13195
MaxBitWidth = 8u;
13208
13196
@@ -13251,8 +13239,10 @@ void BoUpSLP::computeMinimumValueSizes() {
13251
13239
// If we can truncate the root, we must collect additional values that might
13252
13240
// be demoted as a result. That is, those seeded by truncations we will
13253
13241
// modify.
13254
- while (!Roots.empty())
13255
- collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
13242
+ while (!Roots.empty()) {
13243
+ DenseSet<Value *> Visited;
13244
+ collectValuesToDemote(Roots.pop_back_val(), ToDemote, Roots, Visited);
13245
+ }
13256
13246
13257
13247
// Finally, map the values we can demote to the maximum bit with we computed.
13258
13248
DenseMap<const TreeEntry *, bool> Signendness;
0 commit comments