@@ -10226,11 +10226,9 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
10226
10226
for (const TreeEntry *TE : ForRemoval)
10227
10227
Set.erase(TE);
10228
10228
}
10229
- bool NeedToRemapValues = false;
10230
10229
for (auto *It = UsedTEs.begin(); It != UsedTEs.end();) {
10231
10230
if (It->empty()) {
10232
10231
UsedTEs.erase(It);
10233
- NeedToRemapValues = true;
10234
10232
continue;
10235
10233
}
10236
10234
std::advance(It, 1);
@@ -10239,19 +10237,6 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
10239
10237
Entries.clear();
10240
10238
return std::nullopt;
10241
10239
}
10242
- // Recalculate the mapping between the values and entries sets.
10243
- if (NeedToRemapValues) {
10244
- DenseMap<Value *, int> PrevUsedValuesEntry;
10245
- PrevUsedValuesEntry.swap(UsedValuesEntry);
10246
- for (auto [Idx, Set] : enumerate(UsedTEs)) {
10247
- DenseSet<Value *> Values;
10248
- for (const TreeEntry *E : Set)
10249
- Values.insert(E->Scalars.begin(), E->Scalars.end());
10250
- for (const auto &P : PrevUsedValuesEntry)
10251
- if (Values.contains(P.first))
10252
- UsedValuesEntry.try_emplace(P.first, Idx);
10253
- }
10254
- }
10255
10240
}
10256
10241
10257
10242
unsigned VF = 0;
@@ -14022,33 +14007,6 @@ bool BoUpSLP::collectValuesToDemote(
14022
14007
};
14023
14008
unsigned Start = 0;
14024
14009
unsigned End = I->getNumOperands();
14025
-
14026
- auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14027
- if (!IsProfitableToDemote)
14028
- return false;
14029
- return (ITE && ITE->UserTreeIndices.size() > 1) ||
14030
- IsPotentiallyTruncated(I, BitWidth);
14031
- };
14032
- auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14033
- NeedToExit = false;
14034
- unsigned InitLevel = MaxDepthLevel;
14035
- for (Value *IncValue : Operands) {
14036
- unsigned Level = InitLevel;
14037
- if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14038
- ToDemote, DemotedConsts, Visited, Level,
14039
- IsProfitableToDemote, IsTruncRoot)) {
14040
- if (!IsProfitableToDemote)
14041
- return false;
14042
- NeedToExit = true;
14043
- if (!FinalAnalysis(ITE))
14044
- return false;
14045
- continue;
14046
- }
14047
- MaxDepthLevel = std::max(MaxDepthLevel, Level);
14048
- }
14049
- return true;
14050
- };
14051
- bool NeedToExit = false;
14052
14010
switch (I->getOpcode()) {
14053
14011
14054
14012
// We can always demote truncations and extensions. Since truncations can
@@ -14074,21 +14032,35 @@ bool BoUpSLP::collectValuesToDemote(
14074
14032
case Instruction::And:
14075
14033
case Instruction::Or:
14076
14034
case Instruction::Xor: {
14077
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14078
- return false;
14079
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14035
+ unsigned Level1, Level2;
14036
+ if ((ITE->UserTreeIndices.size() > 1 &&
14037
+ !IsPotentiallyTruncated(I, BitWidth)) ||
14038
+ !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14039
+ BitWidth, ToDemote, DemotedConsts, Visited,
14040
+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14041
+ !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14042
+ BitWidth, ToDemote, DemotedConsts, Visited,
14043
+ Level2, IsProfitableToDemote, IsTruncRoot))
14080
14044
return false;
14045
+ MaxDepthLevel = std::max(Level1, Level2);
14081
14046
break;
14082
14047
}
14083
14048
14084
14049
// We can demote selects if we can demote their true and false values.
14085
14050
case Instruction::Select: {
14086
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14087
- return false;
14088
14051
Start = 1;
14089
- auto *SI = cast<SelectInst>(I);
14090
- if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14052
+ unsigned Level1, Level2;
14053
+ SelectInst *SI = cast<SelectInst>(I);
14054
+ if ((ITE->UserTreeIndices.size() > 1 &&
14055
+ !IsPotentiallyTruncated(I, BitWidth)) ||
14056
+ !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14057
+ BitWidth, ToDemote, DemotedConsts, Visited,
14058
+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14059
+ !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14060
+ BitWidth, ToDemote, DemotedConsts, Visited,
14061
+ Level2, IsProfitableToDemote, IsTruncRoot))
14091
14062
return false;
14063
+ MaxDepthLevel = std::max(Level1, Level2);
14092
14064
break;
14093
14065
}
14094
14066
@@ -14099,20 +14071,22 @@ bool BoUpSLP::collectValuesToDemote(
14099
14071
MaxDepthLevel = 0;
14100
14072
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14101
14073
return false;
14102
- SmallVector<Value *> Ops(PN->incoming_values().begin(),
14103
- PN->incoming_values().end());
14104
- if (!ProcessOperands(Ops, NeedToExit))
14105
- return false;
14074
+ for (Value *IncValue : PN->incoming_values()) {
14075
+ unsigned Level;
14076
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14077
+ ToDemote, DemotedConsts, Visited, Level,
14078
+ IsProfitableToDemote, IsTruncRoot))
14079
+ return false;
14080
+ MaxDepthLevel = std::max(MaxDepthLevel, Level);
14081
+ }
14106
14082
break;
14107
14083
}
14108
14084
14109
14085
// Otherwise, conservatively give up.
14110
14086
default:
14111
14087
MaxDepthLevel = 1;
14112
- return FinalAnalysis( );
14088
+ return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth );
14113
14089
}
14114
- if (NeedToExit)
14115
- return true;
14116
14090
14117
14091
++MaxDepthLevel;
14118
14092
// Gather demoted constant operands.
@@ -14151,17 +14125,15 @@ void BoUpSLP::computeMinimumValueSizes() {
14151
14125
14152
14126
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
14153
14127
// resize to the final type.
14154
- bool IsTruncRoot = false;
14155
14128
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
14156
14129
if (NodeIdx != 0 &&
14157
14130
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14158
14131
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
14159
14132
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
14160
14133
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
14161
14134
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14162
- IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14163
- IsProfitableToDemoteRoot = true;
14164
14135
++NodeIdx;
14136
+ IsProfitableToDemoteRoot = true;
14165
14137
}
14166
14138
14167
14139
// Analyzed in reduction already and not profitable - exit.
@@ -14293,6 +14265,7 @@ void BoUpSLP::computeMinimumValueSizes() {
14293
14265
ReductionBitWidth = bit_ceil(ReductionBitWidth);
14294
14266
}
14295
14267
bool IsTopRoot = NodeIdx == 0;
14268
+ bool IsTruncRoot = false;
14296
14269
while (NodeIdx < VectorizableTree.size() &&
14297
14270
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14298
14271
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
0 commit comments