-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[SLP]Support revectorization of the previously vectorized scalars" #134604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…lars" This reverts commit 0e3049c.
@llvm/pr-subscribers-llvm-transforms Author: Alexander Kornienko (alexfh) ChangesReverts llvm/llvm-project#133091 due to crashes See #133091 (comment) for the report. Patch is 35.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134604.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e2031df810573..b58cf19d9b28a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3783,6 +3783,11 @@ class BoUpSLP {
if (isa<PoisonValue>(V))
continue;
auto It = ScalarToTreeEntries.find(V);
+ assert(
+ (It == ScalarToTreeEntries.end() ||
+ (It->getSecond().size() == 1 && It->getSecond().front() == Last) ||
+ doesNotNeedToBeScheduled(V)) &&
+ "Scalar already in tree!");
if (It == ScalarToTreeEntries.end()) {
ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
(void)Processed.insert(V);
@@ -4042,9 +4047,6 @@ class BoUpSLP {
private:
/// Used for getting a "good" final ordering of instructions.
int SchedulingPriority = 0;
- /// True if this instruction (or bundle) is scheduled (or considered as
- /// scheduled in the dry-run).
- bool IsScheduled = false;
/// The kind of the ScheduleEntity.
const Kind K = Kind::ScheduleData;
@@ -4058,10 +4060,6 @@ class BoUpSLP {
return SD->isReady();
return cast<ScheduleBundle>(this)->isReady();
}
- /// Gets/sets if the bundle is scheduled.
- bool isScheduled() const { return IsScheduled; }
- void setScheduled(bool Scheduled) { IsScheduled = Scheduled; }
-
static bool classof(const ScheduleEntity *) { return true; }
};
@@ -4134,6 +4132,10 @@ class BoUpSLP {
IsScheduled = false;
}
+ /// Gets/sets if the bundle is scheduled.
+ bool isScheduled() const { return IsScheduled; }
+ void setScheduled(bool Scheduled) { IsScheduled = Scheduled; }
+
/// Gets the number of unscheduled dependencies.
int getUnscheduledDeps() const { return UnscheduledDeps; }
/// Gets the number of dependencies.
@@ -4208,6 +4210,10 @@ class BoUpSLP {
/// for scheduling.
/// Note that this is negative as long as Dependencies is not calculated.
int UnscheduledDeps = InvalidDeps;
+
+ /// True if this instruction is scheduled (or considered as scheduled in the
+ /// dry-run).
+ bool IsScheduled = false;
};
#ifndef NDEBUG
@@ -4252,6 +4258,11 @@ class BoUpSLP {
}
}
+ bool isScheduled() const {
+ return all_of(Bundle,
+ [](const ScheduleData *SD) { return SD->isScheduled(); });
+ }
+
/// Returns the number of unscheduled dependencies in the bundle.
int unscheduledDepsInBundle() const {
assert(*this && "bundle must not be empty");
@@ -4508,22 +4519,12 @@ class BoUpSLP {
ProcessBundleMember(SD, nullptr);
} else {
ScheduleBundle &Bundle = *cast<ScheduleBundle>(Data);
- Bundle.setScheduled(/*Scheduled=*/true);
+ for_each(Bundle.getBundle(), [](ScheduleData *SD) {
+ SD->setScheduled(/*Scheduled=*/true);
+ });
LLVM_DEBUG(dbgs() << "SLP: schedule " << Bundle << "\n");
- auto AreAllBundlesScheduled = [&](const ScheduleData *SD) {
- ArrayRef<ScheduleBundle *> SDBundles =
- getScheduleBundles(SD->getInst());
- return !SDBundles.empty() &&
- all_of(SDBundles, [&](const ScheduleBundle *SDBundle) {
- return SDBundle->isScheduled();
- });
- };
- for (ScheduleData *SD : Bundle.getBundle()) {
- if (AreAllBundlesScheduled(SD)) {
- SD->setScheduled(/*Scheduled=*/true);
- ProcessBundleMember(SD, &Bundle);
- }
- }
+ for (ScheduleData *SD : Bundle.getBundle())
+ ProcessBundleMember(SD, &Bundle);
}
}
@@ -4554,11 +4555,10 @@ class BoUpSLP {
SD->verify();
}
- assert(all_of(ReadyInsts,
- [](const ScheduleEntity *Bundle) {
- return Bundle->isReady();
- }) &&
- "item in ready list not ready?");
+ for (const ScheduleEntity *Bundle : ReadyInsts) {
+ assert(Bundle->isReady() && "item in ready list not ready?");
+ (void)Bundle;
+ }
}
/// Put all instructions into the ReadyList which are ready for scheduling.
@@ -7440,7 +7440,7 @@ void BoUpSLP::buildExternalUses(
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in FoundLane will
// be used.
- if (all_of(UseEntries, [&](TreeEntry *UseEntry) {
+ if (any_of(UseEntries, [&](TreeEntry *UseEntry) {
return UseEntry->State == TreeEntry::ScatterVectorize ||
!doesInTreeUserNeedToExtract(
Scalar, getRootEntryInstruction(*UseEntry), TLI,
@@ -9488,47 +9488,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// We now know that this is a vector of instructions of the same type from
// the same block.
- // Check that none of the instructions in the bundle are already in the tree
- // and the node may be not profitable for the vectorization as the small
- // alternate node.
- if (S && S.isAltShuffle()) {
- auto GetNumVectorizedExtracted = [&]() {
- APInt Extracted = APInt::getZero(VL.size());
- APInt Vectorized = APInt::getAllOnes(VL.size());
- for (auto [Idx, V] : enumerate(VL)) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I || doesNotNeedToBeScheduled(I) ||
- all_of(I->operands(), [&](const Use &U) {
- return isa<ExtractElementInst>(U.get());
- }))
- continue;
- if (isVectorized(I))
- Vectorized.clearBit(Idx);
- else if (!I->hasOneUser() && !areAllUsersVectorized(I, UserIgnoreList))
- Extracted.setBit(Idx);
- }
- return std::make_pair(Vectorized, Extracted);
- };
- auto [Vectorized, Extracted] = GetNumVectorizedExtracted();
- constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
- bool PreferScalarize = !Vectorized.isAllOnes() && VL.size() == 2;
- if (!Vectorized.isAllOnes() && !PreferScalarize) {
- // Rough cost estimation, if the vector code (+ potential extracts) is
- // more profitable than the scalar + buildvector.
- Type *ScalarTy = VL.front()->getType();
- auto *VecTy = getWidenedType(ScalarTy, VL.size());
- InstructionCost VectorizeCostEstimate =
- ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy, {}, Kind) +
- ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Extracted,
- /*Insert=*/false, /*Extract=*/true, Kind);
- InstructionCost ScalarizeCostEstimate =
- ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Vectorized,
- /*Insert=*/true, /*Extract=*/false, Kind);
- PreferScalarize = VectorizeCostEstimate > ScalarizeCostEstimate;
- }
- if (PreferScalarize) {
- LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
- "node is not profitable.\n");
+ // Check that none of the instructions in the bundle are already in the tree.
+ for (Value *V : VL) {
+ if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) ||
+ doesNotNeedToBeScheduled(V))
+ continue;
+ if (isVectorized(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
+ << ") is already in tree.\n");
if (TryToFindDuplicates(S)) {
auto Invalid = ScheduleBundle::invalid();
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
@@ -9617,6 +9584,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
#endif
if (!BundlePtr || (*BundlePtr && !*BundlePtr.value())) {
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
+ assert((!BS.getScheduleData(VL0) || BS.getScheduleBundles(VL0).empty()) &&
+ "tryScheduleBundle should not create bundle on failure");
// Last chance to try to vectorize alternate node.
if (S.isAltShuffle() && ReuseShuffleIndices.empty() &&
TrySplitNode(SmallNodeSize, S))
@@ -12405,7 +12374,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallBitVector UsedScalars(Sz, false);
for (unsigned I = 0; I < Sz; ++I) {
if (isa<Instruction>(UniqueValues[I]) &&
- getTreeEntries(UniqueValues[I]).front() == E)
+ is_contained(getTreeEntries(UniqueValues[I]), E))
continue;
UsedScalars.set(I);
}
@@ -13975,7 +13944,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
for (ExternalUser &EU : ExternalUses) {
ScalarUserAndIdx.emplace_back(EU.Scalar, EU.User, EU.Lane);
}
- SmallDenseSet<std::pair<Value *, Value *>, 8> CheckedScalarUser;
for (ExternalUser &EU : ExternalUses) {
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
@@ -13983,12 +13951,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
if (EphValues.count(EU.User))
continue;
- // Check if the scalar for the given user or all users is accounted already.
- if (!CheckedScalarUser.insert(std::make_pair(EU.Scalar, EU.User)).second ||
- (EU.User &&
- CheckedScalarUser.contains(std::make_pair(EU.Scalar, nullptr))))
- continue;
-
// Used in unreachable blocks or in EH pads (rarely executed) or is
// terminated with unreachable instruction.
if (BasicBlock *UserParent =
@@ -14691,16 +14653,10 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
PHINode *UserPHI = UseEI.UserTE->State != TreeEntry::SplitVectorize
? dyn_cast<PHINode>(UseEI.UserTE->getMainOp())
: nullptr;
- Instruction *InsertPt =
+ const Instruction *InsertPt =
UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
: &getLastInstructionInBundle(UseEI.UserTE);
if (TEInsertPt == InsertPt) {
- // If the schedulable insertion point is used in multiple entries - just
- // exit, no known ordering at this point, available only after real
- // scheduling.
- if (!doesNotNeedToBeScheduled(InsertPt) &&
- (TEUseEI.UserTE != UseEI.UserTE || TEUseEI.EdgeIdx < UseEI.EdgeIdx))
- continue;
// If the users are the PHI nodes with the same incoming blocks - skip.
if (TEUseEI.UserTE->State == TreeEntry::Vectorize &&
TEUseEI.UserTE->getOpcode() == Instruction::PHI &&
@@ -15426,29 +15382,19 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// Set the insert point to the beginning of the basic block if the entry
// should not be scheduled.
- auto FindScheduleBundle = [&](const TreeEntry *E) -> const ScheduleBundle * {
+ const auto *It = BlocksSchedules.find(BB);
+ auto IsNotScheduledEntry = [&](const TreeEntry *E) {
if (E->isGather())
- return nullptr;
+ return false;
// Found previously that the instruction do not need to be scheduled.
- const auto *It = BlocksSchedules.find(BB);
- if (It == BlocksSchedules.end())
- return nullptr;
- for (Value *V : E->Scalars) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I || isa<PHINode>(I) || doesNotNeedToBeScheduled(I))
- continue;
- ArrayRef<ScheduleBundle *> Bundles = It->second->getScheduleBundles(I);
- if (Bundles.empty())
- continue;
- const auto *It = find_if(
- Bundles, [&](ScheduleBundle *B) { return B->getTreeEntry() == E; });
- if (It != Bundles.end())
- return *It;
- }
- return nullptr;
+ return It == BlocksSchedules.end() || all_of(E->Scalars, [&](Value *V) {
+ if (!isa<Instruction>(V))
+ return true;
+ return It->second->getScheduleBundles(V).empty();
+ });
};
- const ScheduleBundle *Bundle = FindScheduleBundle(E);
- if (!E->isGather() && !Bundle) {
+ if (IsNotScheduledEntry(E) ||
+ (!E->isGather() && all_of(E->Scalars, isVectorLikeInstWithConstOps))) {
if ((E->getOpcode() == Instruction::GetElementPtr &&
any_of(E->Scalars,
[](Value *V) {
@@ -15474,10 +15420,19 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// scheduled, and the last instruction is VL.back(). So we start with
// VL.back() and iterate over schedule data until we reach the end of the
// bundle. The end of the bundle is marked by null ScheduleData.
- if (Bundle) {
- assert(!E->isGather() && "Gathered instructions should not be scheduled");
- Res = Bundle->getBundle().back()->getInst();
- return *Res;
+ if (It != BlocksSchedules.end() && !E->isGather()) {
+ Value *V = E->isOneOf(E->Scalars.back());
+ if (doesNotNeedToBeScheduled(V))
+ V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled);
+ if (ArrayRef<ScheduleBundle *> Bundles = It->second->getScheduleBundles(V);
+ !Bundles.empty()) {
+ const auto *It = find_if(
+ Bundles, [&](ScheduleBundle *B) { return B->getTreeEntry() == E; });
+ assert(It != Bundles.end() && "Failed to find bundle");
+ Res = (*It)->getBundle().back()->getInst();
+ return *Res;
+ }
+ assert(E->getOpcode() == Instruction::PHI && "Expected PHI");
}
// LastInst can still be null at this point if there's either not an entry
@@ -16230,10 +16185,10 @@ BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
const InstructionsState &S) {
if (!S)
return nullptr;
- for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
- if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
- TE->isSame(VL))
- return TE;
+ if (TreeEntry *VE = getSameValuesTreeEntry(S.getMainOp(), VL);
+ VE && VE->UserTreeIndex.UserTE == E &&
+ VE->UserTreeIndex.EdgeIdx == NodeIdx)
+ return VE;
return nullptr;
}
@@ -17926,13 +17881,13 @@ Value *BoUpSLP::vectorizeTree(
const ExtraValueToDebugLocsMap &ExternallyUsedValues,
Instruction *ReductionRoot,
ArrayRef<std::tuple<Value *, unsigned, bool>> VectorValuesAndScales) {
- // Clean Entry-to-LastInstruction table. It can be affected after scheduling,
- // need to rebuild it.
- EntryToLastInstruction.clear();
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
}
+ // Clean Entry-to-LastInstruction table. It can be affected after scheduling,
+ // need to rebuild it.
+ EntryToLastInstruction.clear();
if (ReductionRoot)
Builder.SetInsertPoint(ReductionRoot->getParent(),
@@ -18774,10 +18729,18 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
// dependencies. As soon as the bundle is "ready" it means that there are no
// cyclic dependencies and we can schedule it. Note that's important that we
// don't "schedule" the bundle yet.
+ SmallPtrSet<const ScheduleBundle *, 16> Visited;
while (((!Bundle && ReSchedule) || (Bundle && !Bundle.isReady())) &&
!ReadyInsts.empty()) {
ScheduleEntity *Picked = ReadyInsts.pop_back_val();
- assert(Picked->isReady() && "must be ready to schedule");
+ const auto *PickedBundle = dyn_cast<ScheduleBundle>(Picked);
+ if (PickedBundle && !Visited.insert(PickedBundle).second) {
+ assert(PickedBundle->isScheduled() && "bundle must be scheduled");
+ continue;
+ }
+ assert((PickedBundle ? PickedBundle->isReady()
+ : cast<ScheduleData>(Picked)->isReady()) &&
+ "must be ready to schedule");
schedule(Picked, ReadyInsts);
if (Picked == &Bundle)
break;
@@ -18831,16 +18794,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle.isReady()) {
for (ScheduleData *BD : Bundle.getBundle()) {
- if (BD->isReady()) {
- ArrayRef<ScheduleBundle *> Bundles = getScheduleBundles(BD->getInst());
- if (Bundles.empty()) {
- ReadyInsts.insert(BD);
- continue;
- }
- for (ScheduleBundle *B : Bundles)
- if (B->isReady())
- ReadyInsts.insert(B);
- }
+ if (BD->isReady())
+ ReadyInsts.insert(BD);
}
ScheduledBundlesList.pop_back();
for (Value *V : VL) {
@@ -19171,11 +19126,6 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
SD->setScheduled(/*Scheduled=*/false);
SD->resetUnscheduledDeps();
}
- for (ScheduleBundle *Bundle : getScheduleBundles(I)) {
- assert(isInSchedulingRegion(*Bundle) &&
- "ScheduleBundle not in scheduling region");
- Bundle->setScheduled(/*Scheduled=*/false);
- }
}
ReadyInsts.clear();
}
@@ -19234,7 +19184,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
Instruction *LastScheduledInst = BS->ScheduleEnd;
// Do the "real" scheduling.
- SmallPtrSet<Instruction *, 16> Scheduled;
while (!ReadyInsts.empty()) {
auto *Picked = *ReadyInsts.begin();
ReadyInsts.erase(ReadyInsts.begin());
@@ -19244,14 +19193,10 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
if (auto *Bundle = dyn_cast<ScheduleBundle>(Picked)) {
for (const ScheduleData *BundleMember : Bundle->getBundle()) {
Instruction *PickedInst = BundleMember->getInst();
- if (!Scheduled.insert(PickedInst).second)
- continue;
if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst)
PickedInst->moveAfter(LastScheduledInst->getPrevNode());
LastScheduledInst = PickedInst;
}
- EntryToLastInstruction.try_emplace(Bundle->getTreeEntry(),
- LastScheduledInst);
} else {
auto *SD = cast<ScheduleData>(Picked);
Instruction *PickedInst = SD->getInst();
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index fcd3bfc3f323a..3cab4a4da3f8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -39,26 +39,28 @@ define void @test() {
; CHECK: [[BB77]]:
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP16]], float [[I67]], i32 6
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I69]], i32 7
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[I68]], i32 0
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[I66]], i32 1
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
-; C...
[truncated]
|
@llvm/pr-subscribers-vectorizers Author: Alexander Kornienko (alexfh) ChangesReverts llvm/llvm-project#133091 due to crashes See #133091 (comment) for the report. Patch is 35.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134604.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e2031df810573..b58cf19d9b28a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3783,6 +3783,11 @@ class BoUpSLP {
if (isa<PoisonValue>(V))
continue;
auto It = ScalarToTreeEntries.find(V);
+ assert(
+ (It == ScalarToTreeEntries.end() ||
+ (It->getSecond().size() == 1 && It->getSecond().front() == Last) ||
+ doesNotNeedToBeScheduled(V)) &&
+ "Scalar already in tree!");
if (It == ScalarToTreeEntries.end()) {
ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
(void)Processed.insert(V);
@@ -4042,9 +4047,6 @@ class BoUpSLP {
private:
/// Used for getting a "good" final ordering of instructions.
int SchedulingPriority = 0;
- /// True if this instruction (or bundle) is scheduled (or considered as
- /// scheduled in the dry-run).
- bool IsScheduled = false;
/// The kind of the ScheduleEntity.
const Kind K = Kind::ScheduleData;
@@ -4058,10 +4060,6 @@ class BoUpSLP {
return SD->isReady();
return cast<ScheduleBundle>(this)->isReady();
}
- /// Gets/sets if the bundle is scheduled.
- bool isScheduled() const { return IsScheduled; }
- void setScheduled(bool Scheduled) { IsScheduled = Scheduled; }
-
static bool classof(const ScheduleEntity *) { return true; }
};
@@ -4134,6 +4132,10 @@ class BoUpSLP {
IsScheduled = false;
}
+ /// Gets/sets if the bundle is scheduled.
+ bool isScheduled() const { return IsScheduled; }
+ void setScheduled(bool Scheduled) { IsScheduled = Scheduled; }
+
/// Gets the number of unscheduled dependencies.
int getUnscheduledDeps() const { return UnscheduledDeps; }
/// Gets the number of dependencies.
@@ -4208,6 +4210,10 @@ class BoUpSLP {
/// for scheduling.
/// Note that this is negative as long as Dependencies is not calculated.
int UnscheduledDeps = InvalidDeps;
+
+ /// True if this instruction is scheduled (or considered as scheduled in the
+ /// dry-run).
+ bool IsScheduled = false;
};
#ifndef NDEBUG
@@ -4252,6 +4258,11 @@ class BoUpSLP {
}
}
+ bool isScheduled() const {
+ return all_of(Bundle,
+ [](const ScheduleData *SD) { return SD->isScheduled(); });
+ }
+
/// Returns the number of unscheduled dependencies in the bundle.
int unscheduledDepsInBundle() const {
assert(*this && "bundle must not be empty");
@@ -4508,22 +4519,12 @@ class BoUpSLP {
ProcessBundleMember(SD, nullptr);
} else {
ScheduleBundle &Bundle = *cast<ScheduleBundle>(Data);
- Bundle.setScheduled(/*Scheduled=*/true);
+ for_each(Bundle.getBundle(), [](ScheduleData *SD) {
+ SD->setScheduled(/*Scheduled=*/true);
+ });
LLVM_DEBUG(dbgs() << "SLP: schedule " << Bundle << "\n");
- auto AreAllBundlesScheduled = [&](const ScheduleData *SD) {
- ArrayRef<ScheduleBundle *> SDBundles =
- getScheduleBundles(SD->getInst());
- return !SDBundles.empty() &&
- all_of(SDBundles, [&](const ScheduleBundle *SDBundle) {
- return SDBundle->isScheduled();
- });
- };
- for (ScheduleData *SD : Bundle.getBundle()) {
- if (AreAllBundlesScheduled(SD)) {
- SD->setScheduled(/*Scheduled=*/true);
- ProcessBundleMember(SD, &Bundle);
- }
- }
+ for (ScheduleData *SD : Bundle.getBundle())
+ ProcessBundleMember(SD, &Bundle);
}
}
@@ -4554,11 +4555,10 @@ class BoUpSLP {
SD->verify();
}
- assert(all_of(ReadyInsts,
- [](const ScheduleEntity *Bundle) {
- return Bundle->isReady();
- }) &&
- "item in ready list not ready?");
+ for (const ScheduleEntity *Bundle : ReadyInsts) {
+ assert(Bundle->isReady() && "item in ready list not ready?");
+ (void)Bundle;
+ }
}
/// Put all instructions into the ReadyList which are ready for scheduling.
@@ -7440,7 +7440,7 @@ void BoUpSLP::buildExternalUses(
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in FoundLane will
// be used.
- if (all_of(UseEntries, [&](TreeEntry *UseEntry) {
+ if (any_of(UseEntries, [&](TreeEntry *UseEntry) {
return UseEntry->State == TreeEntry::ScatterVectorize ||
!doesInTreeUserNeedToExtract(
Scalar, getRootEntryInstruction(*UseEntry), TLI,
@@ -9488,47 +9488,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// We now know that this is a vector of instructions of the same type from
// the same block.
- // Check that none of the instructions in the bundle are already in the tree
- // and the node may be not profitable for the vectorization as the small
- // alternate node.
- if (S && S.isAltShuffle()) {
- auto GetNumVectorizedExtracted = [&]() {
- APInt Extracted = APInt::getZero(VL.size());
- APInt Vectorized = APInt::getAllOnes(VL.size());
- for (auto [Idx, V] : enumerate(VL)) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I || doesNotNeedToBeScheduled(I) ||
- all_of(I->operands(), [&](const Use &U) {
- return isa<ExtractElementInst>(U.get());
- }))
- continue;
- if (isVectorized(I))
- Vectorized.clearBit(Idx);
- else if (!I->hasOneUser() && !areAllUsersVectorized(I, UserIgnoreList))
- Extracted.setBit(Idx);
- }
- return std::make_pair(Vectorized, Extracted);
- };
- auto [Vectorized, Extracted] = GetNumVectorizedExtracted();
- constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
- bool PreferScalarize = !Vectorized.isAllOnes() && VL.size() == 2;
- if (!Vectorized.isAllOnes() && !PreferScalarize) {
- // Rough cost estimation, if the vector code (+ potential extracts) is
- // more profitable than the scalar + buildvector.
- Type *ScalarTy = VL.front()->getType();
- auto *VecTy = getWidenedType(ScalarTy, VL.size());
- InstructionCost VectorizeCostEstimate =
- ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy, {}, Kind) +
- ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Extracted,
- /*Insert=*/false, /*Extract=*/true, Kind);
- InstructionCost ScalarizeCostEstimate =
- ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Vectorized,
- /*Insert=*/true, /*Extract=*/false, Kind);
- PreferScalarize = VectorizeCostEstimate > ScalarizeCostEstimate;
- }
- if (PreferScalarize) {
- LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
- "node is not profitable.\n");
+ // Check that none of the instructions in the bundle are already in the tree.
+ for (Value *V : VL) {
+ if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) ||
+ doesNotNeedToBeScheduled(V))
+ continue;
+ if (isVectorized(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
+ << ") is already in tree.\n");
if (TryToFindDuplicates(S)) {
auto Invalid = ScheduleBundle::invalid();
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
@@ -9617,6 +9584,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
#endif
if (!BundlePtr || (*BundlePtr && !*BundlePtr.value())) {
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
+ assert((!BS.getScheduleData(VL0) || BS.getScheduleBundles(VL0).empty()) &&
+ "tryScheduleBundle should not create bundle on failure");
// Last chance to try to vectorize alternate node.
if (S.isAltShuffle() && ReuseShuffleIndices.empty() &&
TrySplitNode(SmallNodeSize, S))
@@ -12405,7 +12374,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallBitVector UsedScalars(Sz, false);
for (unsigned I = 0; I < Sz; ++I) {
if (isa<Instruction>(UniqueValues[I]) &&
- getTreeEntries(UniqueValues[I]).front() == E)
+ is_contained(getTreeEntries(UniqueValues[I]), E))
continue;
UsedScalars.set(I);
}
@@ -13975,7 +13944,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
for (ExternalUser &EU : ExternalUses) {
ScalarUserAndIdx.emplace_back(EU.Scalar, EU.User, EU.Lane);
}
- SmallDenseSet<std::pair<Value *, Value *>, 8> CheckedScalarUser;
for (ExternalUser &EU : ExternalUses) {
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
@@ -13983,12 +13951,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
if (EphValues.count(EU.User))
continue;
- // Check if the scalar for the given user or all users is accounted already.
- if (!CheckedScalarUser.insert(std::make_pair(EU.Scalar, EU.User)).second ||
- (EU.User &&
- CheckedScalarUser.contains(std::make_pair(EU.Scalar, nullptr))))
- continue;
-
// Used in unreachable blocks or in EH pads (rarely executed) or is
// terminated with unreachable instruction.
if (BasicBlock *UserParent =
@@ -14691,16 +14653,10 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
PHINode *UserPHI = UseEI.UserTE->State != TreeEntry::SplitVectorize
? dyn_cast<PHINode>(UseEI.UserTE->getMainOp())
: nullptr;
- Instruction *InsertPt =
+ const Instruction *InsertPt =
UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
: &getLastInstructionInBundle(UseEI.UserTE);
if (TEInsertPt == InsertPt) {
- // If the schedulable insertion point is used in multiple entries - just
- // exit, no known ordering at this point, available only after real
- // scheduling.
- if (!doesNotNeedToBeScheduled(InsertPt) &&
- (TEUseEI.UserTE != UseEI.UserTE || TEUseEI.EdgeIdx < UseEI.EdgeIdx))
- continue;
// If the users are the PHI nodes with the same incoming blocks - skip.
if (TEUseEI.UserTE->State == TreeEntry::Vectorize &&
TEUseEI.UserTE->getOpcode() == Instruction::PHI &&
@@ -15426,29 +15382,19 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// Set the insert point to the beginning of the basic block if the entry
// should not be scheduled.
- auto FindScheduleBundle = [&](const TreeEntry *E) -> const ScheduleBundle * {
+ const auto *It = BlocksSchedules.find(BB);
+ auto IsNotScheduledEntry = [&](const TreeEntry *E) {
if (E->isGather())
- return nullptr;
+ return false;
// Found previously that the instruction do not need to be scheduled.
- const auto *It = BlocksSchedules.find(BB);
- if (It == BlocksSchedules.end())
- return nullptr;
- for (Value *V : E->Scalars) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I || isa<PHINode>(I) || doesNotNeedToBeScheduled(I))
- continue;
- ArrayRef<ScheduleBundle *> Bundles = It->second->getScheduleBundles(I);
- if (Bundles.empty())
- continue;
- const auto *It = find_if(
- Bundles, [&](ScheduleBundle *B) { return B->getTreeEntry() == E; });
- if (It != Bundles.end())
- return *It;
- }
- return nullptr;
+ return It == BlocksSchedules.end() || all_of(E->Scalars, [&](Value *V) {
+ if (!isa<Instruction>(V))
+ return true;
+ return It->second->getScheduleBundles(V).empty();
+ });
};
- const ScheduleBundle *Bundle = FindScheduleBundle(E);
- if (!E->isGather() && !Bundle) {
+ if (IsNotScheduledEntry(E) ||
+ (!E->isGather() && all_of(E->Scalars, isVectorLikeInstWithConstOps))) {
if ((E->getOpcode() == Instruction::GetElementPtr &&
any_of(E->Scalars,
[](Value *V) {
@@ -15474,10 +15420,19 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// scheduled, and the last instruction is VL.back(). So we start with
// VL.back() and iterate over schedule data until we reach the end of the
// bundle. The end of the bundle is marked by null ScheduleData.
- if (Bundle) {
- assert(!E->isGather() && "Gathered instructions should not be scheduled");
- Res = Bundle->getBundle().back()->getInst();
- return *Res;
+ if (It != BlocksSchedules.end() && !E->isGather()) {
+ Value *V = E->isOneOf(E->Scalars.back());
+ if (doesNotNeedToBeScheduled(V))
+ V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled);
+ if (ArrayRef<ScheduleBundle *> Bundles = It->second->getScheduleBundles(V);
+ !Bundles.empty()) {
+ const auto *It = find_if(
+ Bundles, [&](ScheduleBundle *B) { return B->getTreeEntry() == E; });
+ assert(It != Bundles.end() && "Failed to find bundle");
+ Res = (*It)->getBundle().back()->getInst();
+ return *Res;
+ }
+ assert(E->getOpcode() == Instruction::PHI && "Expected PHI");
}
// LastInst can still be null at this point if there's either not an entry
@@ -16230,10 +16185,10 @@ BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
const InstructionsState &S) {
if (!S)
return nullptr;
- for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
- if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
- TE->isSame(VL))
- return TE;
+ if (TreeEntry *VE = getSameValuesTreeEntry(S.getMainOp(), VL);
+ VE && VE->UserTreeIndex.UserTE == E &&
+ VE->UserTreeIndex.EdgeIdx == NodeIdx)
+ return VE;
return nullptr;
}
@@ -17926,13 +17881,13 @@ Value *BoUpSLP::vectorizeTree(
const ExtraValueToDebugLocsMap &ExternallyUsedValues,
Instruction *ReductionRoot,
ArrayRef<std::tuple<Value *, unsigned, bool>> VectorValuesAndScales) {
- // Clean Entry-to-LastInstruction table. It can be affected after scheduling,
- // need to rebuild it.
- EntryToLastInstruction.clear();
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
}
+ // Clean Entry-to-LastInstruction table. It can be affected after scheduling,
+ // need to rebuild it.
+ EntryToLastInstruction.clear();
if (ReductionRoot)
Builder.SetInsertPoint(ReductionRoot->getParent(),
@@ -18774,10 +18729,18 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
// dependencies. As soon as the bundle is "ready" it means that there are no
// cyclic dependencies and we can schedule it. Note that's important that we
// don't "schedule" the bundle yet.
+ SmallPtrSet<const ScheduleBundle *, 16> Visited;
while (((!Bundle && ReSchedule) || (Bundle && !Bundle.isReady())) &&
!ReadyInsts.empty()) {
ScheduleEntity *Picked = ReadyInsts.pop_back_val();
- assert(Picked->isReady() && "must be ready to schedule");
+ const auto *PickedBundle = dyn_cast<ScheduleBundle>(Picked);
+ if (PickedBundle && !Visited.insert(PickedBundle).second) {
+ assert(PickedBundle->isScheduled() && "bundle must be scheduled");
+ continue;
+ }
+ assert((PickedBundle ? PickedBundle->isReady()
+ : cast<ScheduleData>(Picked)->isReady()) &&
+ "must be ready to schedule");
schedule(Picked, ReadyInsts);
if (Picked == &Bundle)
break;
@@ -18831,16 +18794,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle.isReady()) {
for (ScheduleData *BD : Bundle.getBundle()) {
- if (BD->isReady()) {
- ArrayRef<ScheduleBundle *> Bundles = getScheduleBundles(BD->getInst());
- if (Bundles.empty()) {
- ReadyInsts.insert(BD);
- continue;
- }
- for (ScheduleBundle *B : Bundles)
- if (B->isReady())
- ReadyInsts.insert(B);
- }
+ if (BD->isReady())
+ ReadyInsts.insert(BD);
}
ScheduledBundlesList.pop_back();
for (Value *V : VL) {
@@ -19171,11 +19126,6 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
SD->setScheduled(/*Scheduled=*/false);
SD->resetUnscheduledDeps();
}
- for (ScheduleBundle *Bundle : getScheduleBundles(I)) {
- assert(isInSchedulingRegion(*Bundle) &&
- "ScheduleBundle not in scheduling region");
- Bundle->setScheduled(/*Scheduled=*/false);
- }
}
ReadyInsts.clear();
}
@@ -19234,7 +19184,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
Instruction *LastScheduledInst = BS->ScheduleEnd;
// Do the "real" scheduling.
- SmallPtrSet<Instruction *, 16> Scheduled;
while (!ReadyInsts.empty()) {
auto *Picked = *ReadyInsts.begin();
ReadyInsts.erase(ReadyInsts.begin());
@@ -19244,14 +19193,10 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
if (auto *Bundle = dyn_cast<ScheduleBundle>(Picked)) {
for (const ScheduleData *BundleMember : Bundle->getBundle()) {
Instruction *PickedInst = BundleMember->getInst();
- if (!Scheduled.insert(PickedInst).second)
- continue;
if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst)
PickedInst->moveAfter(LastScheduledInst->getPrevNode());
LastScheduledInst = PickedInst;
}
- EntryToLastInstruction.try_emplace(Bundle->getTreeEntry(),
- LastScheduledInst);
} else {
auto *SD = cast<ScheduleData>(Picked);
Instruction *PickedInst = SD->getInst();
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index fcd3bfc3f323a..3cab4a4da3f8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -39,26 +39,28 @@ define void @test() {
; CHECK: [[BB77]]:
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP16]], float [[I67]], i32 6
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I69]], i32 7
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[I68]], i32 0
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[I66]], i32 1
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
-; C...
[truncated]
|
This breaks Transforms/SLPVectorizer/X86/reordered-masked-loads.ll. Apparently, the test relies on some changes in #133091. |
Ah, the test is quite fresh - added in 19aec00. @alexey-bataev can you take care of updating it after the revert or should I disable it with XFAIL or otherwise? |
Will you wait for the fix then? Will try to land it soon |
We have a number of failures related to SLP vectorizer - I'll be trying to reduce and root-cause them next. As usual, it would be better to revert to green and reland with the necessary fixes. |
Agree, but it may cause some other unexpected side effects already, there are some patches on top of it already :( |
Indeed, I see a commit-revert-reland chain of #132099 and a few other related commits in the last few days. Reverting that all is probably even messier than landing a fix forward here =( |
Another crash:
Test:
|
Looks like also fixed by f413772 |
Reverts #133091 due to crashes
See #133091 (comment) for the report.