@@ -12225,7 +12225,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
12225
12225
return E->VectorizedValue;
12226
12226
}
12227
12227
if (True->getType() != VecTy || False->getType() != VecTy) {
12228
- assert((getOperandEntry(E, 1)->State == TreeEntry::NeedToGather ||
12228
+ assert((It != MinBWs.end() ||
12229
+ getOperandEntry(E, 1)->State == TreeEntry::NeedToGather ||
12229
12230
getOperandEntry(E, 2)->State == TreeEntry::NeedToGather ||
12230
12231
MinBWs.contains(getOperandEntry(E, 1)) ||
12231
12232
MinBWs.contains(getOperandEntry(E, 2))) &&
@@ -12297,7 +12298,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
12297
12298
return E->VectorizedValue;
12298
12299
}
12299
12300
if (LHS->getType() != VecTy || RHS->getType() != VecTy) {
12300
- assert((getOperandEntry(E, 0)->State == TreeEntry::NeedToGather ||
12301
+ assert((It != MinBWs.end() ||
12302
+ getOperandEntry(E, 0)->State == TreeEntry::NeedToGather ||
12301
12303
getOperandEntry(E, 1)->State == TreeEntry::NeedToGather ||
12302
12304
MinBWs.contains(getOperandEntry(E, 0)) ||
12303
12305
MinBWs.contains(getOperandEntry(E, 1))) &&
@@ -12543,7 +12545,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
12543
12545
((Instruction::isBinaryOp(E->getOpcode()) &&
12544
12546
(LHS->getType() != VecTy || RHS->getType() != VecTy)) ||
12545
12547
(isa<CmpInst>(VL0) && LHS->getType() != RHS->getType()))) {
12546
- assert((getOperandEntry(E, 0)->State == TreeEntry::NeedToGather ||
12548
+ assert((It != MinBWs.end() ||
12549
+ getOperandEntry(E, 0)->State == TreeEntry::NeedToGather ||
12547
12550
getOperandEntry(E, 1)->State == TreeEntry::NeedToGather ||
12548
12551
MinBWs.contains(getOperandEntry(E, 0)) ||
12549
12552
MinBWs.contains(getOperandEntry(E, 1))) &&
@@ -12559,9 +12562,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
12559
12562
else
12560
12563
CastTy = LHS->getType();
12561
12564
}
12562
- if (LHS->getType() != VecTy )
12565
+ if (LHS->getType() != CastTy )
12563
12566
LHS = Builder.CreateIntCast(LHS, CastTy, GetOperandSignedness(0));
12564
- if (RHS->getType() != VecTy )
12567
+ if (RHS->getType() != CastTy )
12565
12568
RHS = Builder.CreateIntCast(RHS, CastTy, GetOperandSignedness(1));
12566
12569
}
12567
12570
@@ -13988,15 +13991,6 @@ bool BoUpSLP::collectValuesToDemote(
13988
13991
// If the value is not a vectorized instruction in the expression and not used
13989
13992
// by the insertelement instruction and not used in multiple vector nodes, it
13990
13993
// cannot be demoted.
13991
- // TODO: improve handling of gathered values and others.
13992
- auto *I = dyn_cast<Instruction>(V);
13993
- const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
13994
- if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
13995
- all_of(I->users(), [&](User *U) {
13996
- return isa<InsertElementInst>(U) && !getTreeEntry(U);
13997
- }))
13998
- return false;
13999
-
14000
13994
auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
14001
13995
if (MultiNodeScalars.contains(V))
14002
13996
return false;
@@ -14011,8 +14005,44 @@ bool BoUpSLP::collectValuesToDemote(
14011
14005
BitWidth = std::max(BitWidth, BitWidth1);
14012
14006
return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
14013
14007
};
14008
+ auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14009
+ if (!IsProfitableToDemote)
14010
+ return false;
14011
+ return (ITE && ITE->UserTreeIndices.size() > 1) ||
14012
+ IsPotentiallyTruncated(V, BitWidth);
14013
+ };
14014
+ // TODO: improve handling of gathered values and others.
14015
+ auto *I = dyn_cast<Instruction>(V);
14016
+ const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
14017
+ if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
14018
+ all_of(I->users(), [&](User *U) {
14019
+ return isa<InsertElementInst>(U) && !getTreeEntry(U);
14020
+ }))
14021
+ return FinalAnalysis();
14022
+
14014
14023
unsigned Start = 0;
14015
14024
unsigned End = I->getNumOperands();
14025
+
14026
+ auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14027
+ NeedToExit = false;
14028
+ unsigned InitLevel = MaxDepthLevel;
14029
+ for (Value *IncValue : Operands) {
14030
+ unsigned Level = InitLevel;
14031
+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14032
+ ToDemote, DemotedConsts, Visited, Level,
14033
+ IsProfitableToDemote, IsTruncRoot)) {
14034
+ if (!IsProfitableToDemote)
14035
+ return false;
14036
+ NeedToExit = true;
14037
+ if (!FinalAnalysis(ITE))
14038
+ return false;
14039
+ continue;
14040
+ }
14041
+ MaxDepthLevel = std::max(MaxDepthLevel, Level);
14042
+ }
14043
+ return true;
14044
+ };
14045
+ bool NeedToExit = false;
14016
14046
switch (I->getOpcode()) {
14017
14047
14018
14048
// We can always demote truncations and extensions. Since truncations can
@@ -14038,35 +14068,21 @@ bool BoUpSLP::collectValuesToDemote(
14038
14068
case Instruction::And:
14039
14069
case Instruction::Or:
14040
14070
case Instruction::Xor: {
14041
- unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14042
- if ((ITE->UserTreeIndices.size() > 1 &&
14043
- !IsPotentiallyTruncated(I, BitWidth)) ||
14044
- !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14045
- BitWidth, ToDemote, DemotedConsts, Visited,
14046
- Level1, IsProfitableToDemote, IsTruncRoot) ||
14047
- !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14048
- BitWidth, ToDemote, DemotedConsts, Visited,
14049
- Level2, IsProfitableToDemote, IsTruncRoot))
14071
+ if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14072
+ return false;
14073
+ if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14050
14074
return false;
14051
- MaxDepthLevel = std::max(Level1, Level2);
14052
14075
break;
14053
14076
}
14054
14077
14055
14078
// We can demote selects if we can demote their true and false values.
14056
14079
case Instruction::Select: {
14080
+ if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14081
+ return false;
14057
14082
Start = 1;
14058
- unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14059
- SelectInst *SI = cast<SelectInst>(I);
14060
- if ((ITE->UserTreeIndices.size() > 1 &&
14061
- !IsPotentiallyTruncated(I, BitWidth)) ||
14062
- !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14063
- BitWidth, ToDemote, DemotedConsts, Visited,
14064
- Level1, IsProfitableToDemote, IsTruncRoot) ||
14065
- !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14066
- BitWidth, ToDemote, DemotedConsts, Visited,
14067
- Level2, IsProfitableToDemote, IsTruncRoot))
14083
+ auto *SI = cast<SelectInst>(I);
14084
+ if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14068
14085
return false;
14069
- MaxDepthLevel = std::max(Level1, Level2);
14070
14086
break;
14071
14087
}
14072
14088
@@ -14076,23 +14092,20 @@ bool BoUpSLP::collectValuesToDemote(
14076
14092
PHINode *PN = cast<PHINode>(I);
14077
14093
if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14078
14094
return false;
14079
- unsigned InitLevel = MaxDepthLevel;
14080
- for (Value *IncValue : PN->incoming_values()) {
14081
- unsigned Level = InitLevel;
14082
- if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14083
- ToDemote, DemotedConsts, Visited, Level,
14084
- IsProfitableToDemote, IsTruncRoot))
14085
- return false;
14086
- MaxDepthLevel = std::max(MaxDepthLevel, Level);
14087
- }
14095
+ SmallVector<Value *> Ops(PN->incoming_values().begin(),
14096
+ PN->incoming_values().end());
14097
+ if (!ProcessOperands(Ops, NeedToExit))
14098
+ return false;
14088
14099
break;
14089
14100
}
14090
14101
14091
14102
// Otherwise, conservatively give up.
14092
14103
default:
14093
14104
MaxDepthLevel = 1;
14094
- return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth );
14105
+ return FinalAnalysis( );
14095
14106
}
14107
+ if (NeedToExit)
14108
+ return true;
14096
14109
14097
14110
++MaxDepthLevel;
14098
14111
// Gather demoted constant operands.
@@ -14131,15 +14144,17 @@ void BoUpSLP::computeMinimumValueSizes() {
14131
14144
14132
14145
// The first value node for store/insertelement is sext/zext/trunc? Skip it,
14133
14146
// resize to the final type.
14147
+ bool IsTruncRoot = false;
14134
14148
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
14135
14149
if (NodeIdx != 0 &&
14136
14150
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14137
14151
(VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
14138
14152
VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
14139
14153
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
14140
14154
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14141
- ++ NodeIdx;
14155
+ IsTruncRoot = VectorizableTree[ NodeIdx]->getOpcode() == Instruction::Trunc ;
14142
14156
IsProfitableToDemoteRoot = true;
14157
+ ++NodeIdx;
14143
14158
}
14144
14159
14145
14160
// Analyzed in reduction already and not profitable - exit.
@@ -14271,7 +14286,6 @@ void BoUpSLP::computeMinimumValueSizes() {
14271
14286
ReductionBitWidth = bit_ceil(ReductionBitWidth);
14272
14287
}
14273
14288
bool IsTopRoot = NodeIdx == 0;
14274
- bool IsTruncRoot = false;
14275
14289
while (NodeIdx < VectorizableTree.size() &&
14276
14290
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
14277
14291
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
0 commit comments