@@ -10225,9 +10225,11 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
10225
10225
for (const TreeEntry *TE : ForRemoval)
10226
10226
Set.erase(TE);
10227
10227
}
10228
+ bool NeedToRemapValues = false;
10228
10229
for (auto *It = UsedTEs.begin(); It != UsedTEs.end();) {
10229
10230
if (It->empty()) {
10230
10231
UsedTEs.erase(It);
10232
+ NeedToRemapValues = true;
10231
10233
continue;
10232
10234
}
10233
10235
std::advance(It, 1);
@@ -10236,6 +10238,19 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
10236
10238
Entries.clear();
10237
10239
return std::nullopt;
10238
10240
}
10241
+ // Recalculate the mapping between the values and entries sets.
10242
+ if (NeedToRemapValues) {
10243
+ DenseMap<Value *, int> PrevUsedValuesEntry;
10244
+ PrevUsedValuesEntry.swap(UsedValuesEntry);
10245
+ for (auto [Idx, Set] : enumerate(UsedTEs)) {
10246
+ DenseSet<Value *> Values;
10247
+ for (const TreeEntry *E : Set)
10248
+ Values.insert(E->Scalars.begin(), E->Scalars.end());
10249
+ for (const auto &P : PrevUsedValuesEntry)
10250
+ if (Values.contains(P.first))
10251
+ UsedValuesEntry.try_emplace(P.first, Idx);
10252
+ }
10253
+ }
10239
10254
}
10240
10255
10241
10256
unsigned VF = 0;
@@ -14001,6 +14016,33 @@ bool BoUpSLP::collectValuesToDemote(
14001
14016
};
14002
14017
unsigned Start = 0;
14003
14018
unsigned End = I->getNumOperands();
14019
+
14020
+ auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14021
+ if (!IsProfitableToDemote)
14022
+ return false;
14023
+ return (ITE && ITE->UserTreeIndices.size() > 1) ||
14024
+ IsPotentiallyTruncated(I, BitWidth);
14025
+ };
14026
+ auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14027
+ NeedToExit = false;
14028
+ unsigned InitLevel = MaxDepthLevel;
14029
+ for (Value *IncValue : Operands) {
14030
+ unsigned Level = InitLevel;
14031
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14032
+ ToDemote, DemotedConsts, Visited, Level,
14033
+ IsProfitableToDemote, IsTruncRoot)) {
14034
+ if (!IsProfitableToDemote)
14035
+ return false;
14036
+ NeedToExit = true;
14037
+ if (!FinalAnalysis(ITE))
14038
+ return false;
14039
+ continue;
14040
+ }
14041
+ MaxDepthLevel = std::max(MaxDepthLevel, Level);
14042
+ }
14043
+ return true;
14044
+ };
14045
+ bool NeedToExit = false;
14004
14046
switch (I->getOpcode()) {
14005
14047
14006
14048
// We can always demote truncations and extensions. Since truncations can
@@ -14026,35 +14068,21 @@ bool BoUpSLP::collectValuesToDemote(
14026
14068
case Instruction::And:
14027
14069
case Instruction::Or:
14028
14070
case Instruction::Xor: {
14029
- unsigned Level1, Level2;
14030
- if ((ITE->UserTreeIndices.size() > 1 &&
14031
- !IsPotentiallyTruncated(I, BitWidth)) ||
14032
- !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14033
- BitWidth, ToDemote, DemotedConsts, Visited,
14034
- Level1, IsProfitableToDemote, IsTruncRoot) ||
14035
- !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14036
- BitWidth, ToDemote, DemotedConsts, Visited,
14037
- Level2, IsProfitableToDemote, IsTruncRoot))
14071
+ if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14072
+ return false;
14073
+ if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14038
14074
return false;
14039
- MaxDepthLevel = std::max(Level1, Level2);
14040
14075
break;
14041
14076
}
14042
14077
14043
14078
// We can demote selects if we can demote their true and false values.
14044
14079
case Instruction::Select: {
14080
+ if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14081
+ return false;
14045
14082
Start = 1;
14046
- unsigned Level1, Level2;
14047
- SelectInst *SI = cast<SelectInst>(I);
14048
- if ((ITE->UserTreeIndices.size() > 1 &&
14049
- !IsPotentiallyTruncated(I, BitWidth)) ||
14050
- !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14051
- BitWidth, ToDemote, DemotedConsts, Visited,
14052
- Level1, IsProfitableToDemote, IsTruncRoot) ||
14053
- !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14054
- BitWidth, ToDemote, DemotedConsts, Visited,
14055
- Level2, IsProfitableToDemote, IsTruncRoot))
14083
+ auto *SI = cast<SelectInst>(I);
14084
+ if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14056
14085
return false;
14057
- MaxDepthLevel = std::max(Level1, Level2);
14058
14086
break;
14059
14087
}
14060
14088
@@ -14065,22 +14093,20 @@ bool BoUpSLP::collectValuesToDemote(
14065
14093
MaxDepthLevel = 0;
14066
14094
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14067
14095
return false;
14068
- for (Value *IncValue : PN->incoming_values()) {
14069
- unsigned Level;
14070
- if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14071
- ToDemote, DemotedConsts, Visited, Level,
14072
- IsProfitableToDemote, IsTruncRoot))
14073
- return false;
14074
- MaxDepthLevel = std::max(MaxDepthLevel, Level);
14075
- }
14096
+ SmallVector<Value *> Ops(PN->incoming_values().begin(),
14097
+ PN->incoming_values().end());
14098
+ if (!ProcessOperands(Ops, NeedToExit))
14099
+ return false;
14076
14100
break;
14077
14101
}
14078
14102
14079
14103
// Otherwise, conservatively give up.
14080
14104
default:
14081
14105
MaxDepthLevel = 1;
14082
- return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth );
14106
+ return FinalAnalysis( );
14083
14107
}
14108
+ if (NeedToExit)
14109
+ return true;
14084
14110
14085
14111
++MaxDepthLevel;
14086
14112
// Gather demoted constant operands.
@@ -14119,15 +14145,17 @@ void BoUpSLP::computeMinimumValueSizes() {
14119
14145
14120
14146
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
14121
14147
// resize to the final type.
14148
+ bool IsTruncRoot = false;
14122
14149
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
14123
14150
if (NodeIdx != 0 &&
14124
14151
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14125
14152
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
14126
14153
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
14127
14154
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
14128
14155
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14129
- ++ NodeIdx;
14156
+ IsTruncRoot = VectorizableTree[ NodeIdx]->getOpcode() == Instruction::Trunc ;
14130
14157
IsProfitableToDemoteRoot = true;
14158
+ ++NodeIdx;
14131
14159
}
14132
14160
14133
14161
// Analyzed in reduction already and not profitable - exit.
@@ -14259,7 +14287,6 @@ void BoUpSLP::computeMinimumValueSizes() {
14259
14287
ReductionBitWidth = bit_ceil(ReductionBitWidth);
14260
14288
}
14261
14289
bool IsTopRoot = NodeIdx == 0;
14262
- bool IsTruncRoot = false;
14263
14290
while (NodeIdx < VectorizableTree.size() &&
14264
14291
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14265
14292
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
0 commit comments