@@ -13988,6 +13988,15 @@ bool BoUpSLP::collectValuesToDemote(
13988
13988
// If the value is not a vectorized instruction in the expression and not used
13989
13989
// by the insertelement instruction and not used in multiple vector nodes, it
13990
13990
// cannot be demoted.
13991
+ // TODO: improve handling of gathered values and others.
13992
+ auto *I = dyn_cast<Instruction>(V);
13993
+ const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
13994
+ if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
13995
+ all_of(I->users(), [&](User *U) {
13996
+ return isa<InsertElementInst>(U) && !getTreeEntry(U);
13997
+ }))
13998
+ return false;
13999
+
13991
14000
auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
13992
14001
if (MultiNodeScalars.contains(V))
13993
14002
return false;
@@ -14002,44 +14011,8 @@ bool BoUpSLP::collectValuesToDemote(
14002
14011
BitWidth = std::max(BitWidth, BitWidth1);
14003
14012
return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
14004
14013
};
14005
- auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14006
- if (!IsProfitableToDemote)
14007
- return false;
14008
- return (ITE && ITE->UserTreeIndices.size() > 1) ||
14009
- IsPotentiallyTruncated(V, BitWidth);
14010
- };
14011
- // TODO: improve handling of gathered values and others.
14012
- auto *I = dyn_cast<Instruction>(V);
14013
- const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
14014
- if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
14015
- all_of(I->users(), [&](User *U) {
14016
- return isa<InsertElementInst>(U) && !getTreeEntry(U);
14017
- }))
14018
- return FinalAnalysis();
14019
-
14020
14014
unsigned Start = 0;
14021
14015
unsigned End = I->getNumOperands();
14022
-
14023
- auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14024
- NeedToExit = false;
14025
- unsigned InitLevel = MaxDepthLevel;
14026
- for (Value *IncValue : Operands) {
14027
- unsigned Level = InitLevel;
14028
- if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14029
- ToDemote, DemotedConsts, Visited, Level,
14030
- IsProfitableToDemote, IsTruncRoot)) {
14031
- if (!IsProfitableToDemote)
14032
- return false;
14033
- NeedToExit = true;
14034
- if (!FinalAnalysis(ITE))
14035
- return false;
14036
- continue;
14037
- }
14038
- MaxDepthLevel = std::max(MaxDepthLevel, Level);
14039
- }
14040
- return true;
14041
- };
14042
- bool NeedToExit = false;
14043
14016
switch (I->getOpcode()) {
14044
14017
14045
14018
// We can always demote truncations and extensions. Since truncations can
@@ -14065,21 +14038,35 @@ bool BoUpSLP::collectValuesToDemote(
14065
14038
case Instruction::And:
14066
14039
case Instruction::Or:
14067
14040
case Instruction::Xor: {
14068
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14069
- return false;
14070
- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14041
+ unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14042
+ if ((ITE->UserTreeIndices.size() > 1 &&
14043
+ !IsPotentiallyTruncated(I, BitWidth)) ||
14044
+ !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14045
+ BitWidth, ToDemote, DemotedConsts, Visited,
14046
+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14047
+ !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14048
+ BitWidth, ToDemote, DemotedConsts, Visited,
14049
+ Level2, IsProfitableToDemote, IsTruncRoot))
14071
14050
return false;
14051
+ MaxDepthLevel = std::max(Level1, Level2);
14072
14052
break;
14073
14053
}
14074
14054
14075
14055
// We can demote selects if we can demote their true and false values.
14076
14056
case Instruction::Select: {
14077
- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14078
- return false;
14079
14057
Start = 1;
14080
- auto *SI = cast<SelectInst>(I);
14081
- if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14058
+ unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14059
+ SelectInst *SI = cast<SelectInst>(I);
14060
+ if ((ITE->UserTreeIndices.size() > 1 &&
14061
+ !IsPotentiallyTruncated(I, BitWidth)) ||
14062
+ !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14063
+ BitWidth, ToDemote, DemotedConsts, Visited,
14064
+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14065
+ !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14066
+ BitWidth, ToDemote, DemotedConsts, Visited,
14067
+ Level2, IsProfitableToDemote, IsTruncRoot))
14082
14068
return false;
14069
+ MaxDepthLevel = std::max(Level1, Level2);
14083
14070
break;
14084
14071
}
14085
14072
@@ -14089,20 +14076,23 @@ bool BoUpSLP::collectValuesToDemote(
14089
14076
PHINode *PN = cast<PHINode>(I);
14090
14077
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14091
14078
return false;
14092
- SmallVector<Value *> Ops(PN->incoming_values().begin(),
14093
- PN->incoming_values().end());
14094
- if (!ProcessOperands(Ops, NeedToExit))
14095
- return false;
14079
+ unsigned InitLevel = MaxDepthLevel;
14080
+ for (Value *IncValue : PN->incoming_values()) {
14081
+ unsigned Level = InitLevel;
14082
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14083
+ ToDemote, DemotedConsts, Visited, Level,
14084
+ IsProfitableToDemote, IsTruncRoot))
14085
+ return false;
14086
+ MaxDepthLevel = std::max(MaxDepthLevel, Level);
14087
+ }
14096
14088
break;
14097
14089
}
14098
14090
14099
14091
// Otherwise, conservatively give up.
14100
14092
default:
14101
14093
MaxDepthLevel = 1;
14102
- return FinalAnalysis( );
14094
+ return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth );
14103
14095
}
14104
- if (NeedToExit)
14105
- return true;
14106
14096
14107
14097
++MaxDepthLevel;
14108
14098
// Gather demoted constant operands.
@@ -14141,17 +14131,15 @@ void BoUpSLP::computeMinimumValueSizes() {
14141
14131
14142
14132
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
14143
14133
// resize to the final type.
14144
- bool IsTruncRoot = false;
14145
14134
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
14146
14135
if (NodeIdx != 0 &&
14147
14136
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14148
14137
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
14149
14138
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
14150
14139
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
14151
14140
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14152
- IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14153
- IsProfitableToDemoteRoot = true;
14154
14141
++NodeIdx;
14142
+ IsProfitableToDemoteRoot = true;
14155
14143
}
14156
14144
14157
14145
// Analyzed in reduction already and not profitable - exit.
@@ -14283,6 +14271,7 @@ void BoUpSLP::computeMinimumValueSizes() {
14283
14271
ReductionBitWidth = bit_ceil(ReductionBitWidth);
14284
14272
}
14285
14273
bool IsTopRoot = NodeIdx == 0;
14274
+ bool IsTruncRoot = false;
14286
14275
while (NodeIdx < VectorizableTree.size() &&
14287
14276
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14288
14277
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
0 commit comments