@@ -10225,11 +10225,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
10225
10225
for (const TreeEntry *TE : ForRemoval)
10226
10226
Set.erase(TE);
10227
10227
}
10228
- bool NeedToRemapValues = false;
10229
10228
for (auto *It = UsedTEs.begin(); It != UsedTEs.end();) {
10230
10229
if (It->empty()) {
10231
10230
UsedTEs.erase(It);
10232
- NeedToRemapValues = true;
10233
10231
continue;
10234
10232
}
10235
10233
std::advance(It, 1);
@@ -10238,19 +10236,6 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
10238
10236
Entries.clear();
10239
10237
return std::nullopt;
10240
10238
}
10241
- // Recalculate the mapping between the values and entries sets.
10242
- if (NeedToRemapValues) {
10243
- DenseMap<Value *, int> PrevUsedValuesEntry;
10244
- PrevUsedValuesEntry.swap(UsedValuesEntry);
10245
- for (auto [Idx, Set] : enumerate(UsedTEs)) {
10246
- DenseSet<Value *> Values;
10247
- for (const TreeEntry *E : Set)
10248
- Values.insert(E->Scalars.begin(), E->Scalars.end());
10249
- for (const auto &P : PrevUsedValuesEntry)
10250
- if (Values.contains(P.first))
10251
- UsedValuesEntry.try_emplace(P.first, Idx);
10252
- }
10253
- }
10254
10239
}
10255
10240
10256
10241
unsigned VF = 0;
@@ -14016,33 +14001,6 @@ bool BoUpSLP::collectValuesToDemote(
14016
14001
};
14017
14002
unsigned Start = 0;
14018
14003
unsigned End = I->getNumOperands();
14019
-
14020
- auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14021
- if (!IsProfitableToDemote)
14022
- return false;
14023
- return (ITE && ITE->UserTreeIndices.size() > 1) ||
14024
- IsPotentiallyTruncated(I, BitWidth);
14025
- };
14026
- auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14027
- NeedToExit = false;
14028
- unsigned InitLevel = MaxDepthLevel;
14029
- for (Value *IncValue : Operands) {
14030
- unsigned Level = InitLevel;
14031
- if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14032
- ToDemote, DemotedConsts, Visited, Level,
14033
- IsProfitableToDemote, IsTruncRoot)) {
14034
- if (!IsProfitableToDemote)
14035
- return false;
14036
- NeedToExit = true;
14037
- if (!FinalAnalysis(ITE))
14038
- return false;
14039
- continue;
14040
- }
14041
- MaxDepthLevel = std::max(MaxDepthLevel, Level);
14042
- }
14043
- return true;
14044
- };
14045
- bool NeedToExit = false;
14046
14004
switch (I->getOpcode()) {
14047
14005
14048
14006
// We can always demote truncations and extensions. Since truncations can
@@ -14068,21 +14026,35 @@ bool BoUpSLP::collectValuesToDemote(
14068
14026
case Instruction::And:
14069
14027
case Instruction::Or:
14070
14028
case Instruction::Xor: {
14071
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14072
- return false;
14073
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14029
+ unsigned Level1, Level2;
14030
+ if ((ITE->UserTreeIndices.size() > 1 &&
14031
+ !IsPotentiallyTruncated(I, BitWidth)) ||
14032
+ !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14033
+ BitWidth, ToDemote, DemotedConsts, Visited,
14034
+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14035
+ !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14036
+ BitWidth, ToDemote, DemotedConsts, Visited,
14037
+ Level2, IsProfitableToDemote, IsTruncRoot))
14074
14038
return false;
14039
+ MaxDepthLevel = std::max(Level1, Level2);
14075
14040
break;
14076
14041
}
14077
14042
14078
14043
// We can demote selects if we can demote their true and false values.
14079
14044
case Instruction::Select: {
14080
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14081
- return false;
14082
14045
Start = 1;
14083
- auto *SI = cast<SelectInst>(I);
14084
- if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14046
+ unsigned Level1, Level2;
14047
+ SelectInst *SI = cast<SelectInst>(I);
14048
+ if ((ITE->UserTreeIndices.size() > 1 &&
14049
+ !IsPotentiallyTruncated(I, BitWidth)) ||
14050
+ !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14051
+ BitWidth, ToDemote, DemotedConsts, Visited,
14052
+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14053
+ !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14054
+ BitWidth, ToDemote, DemotedConsts, Visited,
14055
+ Level2, IsProfitableToDemote, IsTruncRoot))
14085
14056
return false;
14057
+ MaxDepthLevel = std::max(Level1, Level2);
14086
14058
break;
14087
14059
}
14088
14060
@@ -14093,20 +14065,22 @@ bool BoUpSLP::collectValuesToDemote(
14093
14065
MaxDepthLevel = 0;
14094
14066
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14095
14067
return false;
14096
- SmallVector<Value *> Ops(PN->incoming_values().begin(),
14097
- PN->incoming_values().end());
14098
- if (!ProcessOperands(Ops, NeedToExit))
14099
- return false;
14068
+ for (Value *IncValue : PN->incoming_values()) {
14069
+ unsigned Level;
14070
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14071
+ ToDemote, DemotedConsts, Visited, Level,
14072
+ IsProfitableToDemote, IsTruncRoot))
14073
+ return false;
14074
+ MaxDepthLevel = std::max(MaxDepthLevel, Level);
14075
+ }
14100
14076
break;
14101
14077
}
14102
14078
14103
14079
// Otherwise, conservatively give up.
14104
14080
default:
14105
14081
MaxDepthLevel = 1;
14106
- return FinalAnalysis( );
14082
+ return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth );
14107
14083
}
14108
- if (NeedToExit)
14109
- return true;
14110
14084
14111
14085
++MaxDepthLevel;
14112
14086
// Gather demoted constant operands.
@@ -14145,17 +14119,15 @@ void BoUpSLP::computeMinimumValueSizes() {
14145
14119
14146
14120
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
14147
14121
// resize to the final type.
14148
- bool IsTruncRoot = false;
14149
14122
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
14150
14123
if (NodeIdx != 0 &&
14151
14124
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14152
14125
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
14153
14126
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
14154
14127
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
14155
14128
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14156
- IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14157
- IsProfitableToDemoteRoot = true;
14158
14129
++NodeIdx;
14130
+ IsProfitableToDemoteRoot = true;
14159
14131
}
14160
14132
14161
14133
// Analyzed in reduction already and not profitable - exit.
@@ -14287,6 +14259,7 @@ void BoUpSLP::computeMinimumValueSizes() {
14287
14259
ReductionBitWidth = bit_ceil(ReductionBitWidth);
14288
14260
}
14289
14261
bool IsTopRoot = NodeIdx == 0;
14262
+ bool IsTruncRoot = false;
14290
14263
while (NodeIdx < VectorizableTree.size() &&
14291
14264
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14292
14265
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
0 commit comments