@@ -13987,15 +13987,6 @@ bool BoUpSLP::collectValuesToDemote(
13987
13987
// If the value is not a vectorized instruction in the expression and not used
13988
13988
// by the insertelement instruction and not used in multiple vector nodes, it
13989
13989
// cannot be demoted.
13990
- // TODO: improve handling of gathered values and others.
13991
- auto *I = dyn_cast<Instruction>(V);
13992
- const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
13993
- if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
13994
- all_of(I->users(), [&](User *U) {
13995
- return isa<InsertElementInst>(U) && !getTreeEntry(U);
13996
- }))
13997
- return false;
13998
-
13999
13990
auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
14000
13991
if (MultiNodeScalars.contains(V))
14001
13992
return false;
@@ -14010,8 +14001,44 @@ bool BoUpSLP::collectValuesToDemote(
14010
14001
BitWidth = std::max(BitWidth, BitWidth1);
14011
14002
return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
14012
14003
};
14004
+ auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14005
+ if (!IsProfitableToDemote)
14006
+ return false;
14007
+ return (ITE && ITE->UserTreeIndices.size() > 1) ||
14008
+ IsPotentiallyTruncated(V, BitWidth);
14009
+ };
14010
+ // TODO: improve handling of gathered values and others.
14011
+ auto *I = dyn_cast<Instruction>(V);
14012
+ const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
14013
+ if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
14014
+ all_of(I->users(), [&](User *U) {
14015
+ return isa<InsertElementInst>(U) && !getTreeEntry(U);
14016
+ }))
14017
+ return FinalAnalysis();
14018
+
14013
14019
unsigned Start = 0;
14014
14020
unsigned End = I->getNumOperands();
14021
+
14022
+ auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14023
+ NeedToExit = false;
14024
+ unsigned InitLevel = MaxDepthLevel;
14025
+ for (Value *IncValue : Operands) {
14026
+ unsigned Level = InitLevel;
14027
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14028
+ ToDemote, DemotedConsts, Visited, Level,
14029
+ IsProfitableToDemote, IsTruncRoot)) {
14030
+ if (!IsProfitableToDemote)
14031
+ return false;
14032
+ NeedToExit = true;
14033
+ if (!FinalAnalysis(ITE))
14034
+ return false;
14035
+ continue;
14036
+ }
14037
+ MaxDepthLevel = std::max(MaxDepthLevel, Level);
14038
+ }
14039
+ return true;
14040
+ };
14041
+ bool NeedToExit = false;
14015
14042
switch (I->getOpcode()) {
14016
14043
14017
14044
// We can always demote truncations and extensions. Since truncations can
@@ -14037,35 +14064,21 @@ bool BoUpSLP::collectValuesToDemote(
14037
14064
case Instruction::And:
14038
14065
case Instruction::Or:
14039
14066
case Instruction::Xor: {
14040
- unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14041
- if ((ITE->UserTreeIndices.size() > 1 &&
14042
- !IsPotentiallyTruncated(I, BitWidth)) ||
14043
- !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14044
- BitWidth, ToDemote, DemotedConsts, Visited,
14045
- Level1, IsProfitableToDemote, IsTruncRoot) ||
14046
- !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14047
- BitWidth, ToDemote, DemotedConsts, Visited,
14048
- Level2, IsProfitableToDemote, IsTruncRoot))
14067
+ if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14068
+ return false;
14069
+ if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14049
14070
return false;
14050
- MaxDepthLevel = std::max(Level1, Level2);
14051
14071
break;
14052
14072
}
14053
14073
14054
14074
// We can demote selects if we can demote their true and false values.
14055
14075
case Instruction::Select: {
14076
+ if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14077
+ return false;
14056
14078
Start = 1;
14057
- unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14058
- SelectInst *SI = cast<SelectInst>(I);
14059
- if ((ITE->UserTreeIndices.size() > 1 &&
14060
- !IsPotentiallyTruncated(I, BitWidth)) ||
14061
- !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14062
- BitWidth, ToDemote, DemotedConsts, Visited,
14063
- Level1, IsProfitableToDemote, IsTruncRoot) ||
14064
- !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14065
- BitWidth, ToDemote, DemotedConsts, Visited,
14066
- Level2, IsProfitableToDemote, IsTruncRoot))
14079
+ auto *SI = cast<SelectInst>(I);
14080
+ if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14067
14081
return false;
14068
- MaxDepthLevel = std::max(Level1, Level2);
14069
14082
break;
14070
14083
}
14071
14084
@@ -14075,23 +14088,20 @@ bool BoUpSLP::collectValuesToDemote(
14075
14088
PHINode *PN = cast<PHINode>(I);
14076
14089
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14077
14090
return false;
14078
- unsigned InitLevel = MaxDepthLevel;
14079
- for (Value *IncValue : PN->incoming_values()) {
14080
- unsigned Level = InitLevel;
14081
- if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14082
- ToDemote, DemotedConsts, Visited, Level,
14083
- IsProfitableToDemote, IsTruncRoot))
14084
- return false;
14085
- MaxDepthLevel = std::max(MaxDepthLevel, Level);
14086
- }
14091
+ SmallVector<Value *> Ops(PN->incoming_values().begin(),
14092
+ PN->incoming_values().end());
14093
+ if (!ProcessOperands(Ops, NeedToExit))
14094
+ return false;
14087
14095
break;
14088
14096
}
14089
14097
14090
14098
// Otherwise, conservatively give up.
14091
14099
default:
14092
14100
MaxDepthLevel = 1;
14093
- return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth );
14101
+ return FinalAnalysis( );
14094
14102
}
14103
+ if (NeedToExit)
14104
+ return true;
14095
14105
14096
14106
++MaxDepthLevel;
14097
14107
// Gather demoted constant operands.
@@ -14130,15 +14140,17 @@ void BoUpSLP::computeMinimumValueSizes() {
14130
14140
14131
14141
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
14132
14142
// resize to the final type.
14143
+ bool IsTruncRoot = false;
14133
14144
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
14134
14145
if (NodeIdx != 0 &&
14135
14146
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14136
14147
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
14137
14148
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
14138
14149
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
14139
14150
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14140
- ++ NodeIdx;
14151
+ IsTruncRoot = VectorizableTree[ NodeIdx]->getOpcode() == Instruction::Trunc ;
14141
14152
IsProfitableToDemoteRoot = true;
14153
+ ++NodeIdx;
14142
14154
}
14143
14155
14144
14156
// Analyzed in reduction already and not profitable - exit.
@@ -14270,7 +14282,6 @@ void BoUpSLP::computeMinimumValueSizes() {
14270
14282
ReductionBitWidth = bit_ceil(ReductionBitWidth);
14271
14283
}
14272
14284
bool IsTopRoot = NodeIdx == 0;
14273
- bool IsTruncRoot = false;
14274
14285
while (NodeIdx < VectorizableTree.size() &&
14275
14286
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14276
14287
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
0 commit comments