@@ -10842,17 +10842,22 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10842
10842
// Leave the scalar instructions as is if they are cheaper than extracts.
10843
10843
if (Entry->Idx != 0 || Entry->getOpcode() == Instruction::GetElementPtr ||
10844
10844
Entry->getOpcode() == Instruction::Load) {
10845
+ // Checks if the user of the external scalar is phi in loop body.
10846
+ auto IsPhiInLoop = [&](const ExternalUser &U) {
10847
+ if (auto *Phi = dyn_cast_if_present<PHINode>(U.User)) {
10848
+ auto *I = cast<Instruction>(U.Scalar);
10849
+ const Loop *L = LI->getLoopFor(Phi->getParent());
10850
+ return L && (Phi->getParent() == I->getParent() ||
10851
+ L == LI->getLoopFor(I->getParent()));
10852
+ }
10853
+ return false;
10854
+ };
10845
10855
if (!ValueToExtUses) {
10846
10856
ValueToExtUses.emplace();
10847
10857
for_each(enumerate(ExternalUses), [&](const auto &P) {
10848
10858
// Ignore phis in loops.
10849
- if (auto *Phi = dyn_cast_if_present<PHINode>(P.value().User)) {
10850
- auto *I = cast<Instruction>(P.value().Scalar);
10851
- const Loop *L = LI->getLoopFor(Phi->getParent());
10852
- if (L && (Phi->getParent() == I->getParent() ||
10853
- L == LI->getLoopFor(I->getParent())))
10854
- return;
10855
- }
10859
+ if (IsPhiInLoop(P.value()))
10860
+ return;
10856
10861
10857
10862
ValueToExtUses->try_emplace(P.value().Scalar, P.index());
10858
10863
});
@@ -10903,8 +10908,12 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10903
10908
return ValueToExtUses->contains(V);
10904
10909
});
10905
10910
auto It = ExtractsCount.find(Entry);
10906
- if (It != ExtractsCount.end())
10911
+ if (It != ExtractsCount.end()) {
10912
+ assert(ScalarUsesCount >= It->getSecond().size() &&
10913
+ "Expected total number of external uses not less than "
10914
+ "number of scalar uses.");
10907
10915
ScalarUsesCount -= It->getSecond().size();
10916
+ }
10908
10917
// Keep original scalar if number of externally used instructions in
10909
10918
// the same entry is not power of 2. It may help to do some extra
10910
10919
// vectorization for now.
@@ -10920,7 +10929,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
10920
10929
}
10921
10930
});
10922
10931
ExtraCost = ScalarCost;
10923
- ExtractsCount[Entry].insert(Inst);
10932
+ if (!IsPhiInLoop(EU))
10933
+ ExtractsCount[Entry].insert(Inst);
10924
10934
}
10925
10935
}
10926
10936
}
0 commit comments