Skip to content

[DFAJumpThreading] Avoid exploring the paths that never come back #85505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class SelectInstToUnfold {
explicit operator bool() const { return SI && SIUse; }
};

void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
std::vector<BasicBlock *> *NewBBs);

Expand All @@ -142,6 +142,7 @@ class DFAJumpThreading {
: AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}

bool run(Function &F);
bool LoopInfoBroken;

private:
void
Expand All @@ -157,7 +158,7 @@ class DFAJumpThreading {

std::vector<SelectInstToUnfold> NewSIsToUnfold;
std::vector<BasicBlock *> NewBBs;
unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs);
unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);

// Put newly discovered select instructions into the work list.
for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
Expand Down Expand Up @@ -201,7 +202,7 @@ void createBasicBlockAndSinkSelectInst(
/// created basic blocks into \p NewBBs.
///
/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
std::vector<BasicBlock *> *NewBBs) {
SelectInst *SI = SIToUnfold.getInst();
Expand Down Expand Up @@ -307,6 +308,12 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},
{DominatorTree::Insert, StartBlock, FT}});

// Preserve loop info
if (Loop *L = LI->getLoopFor(SI->getParent())) {
for (BasicBlock *NewBB : *NewBBs)
L->addBasicBlockToLoop(NewBB, *LI);
}

// The select is now dead.
assert(SI->use_empty() && "Select must be dead now");
SI->eraseFromParent();
Expand Down Expand Up @@ -522,9 +529,10 @@ struct MainSwitch {
};

struct AllSwitchPaths {
AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE)
: Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()),
ORE(ORE) {}
AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
LoopInfo *LI)
: Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
LI(LI) {}

std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
unsigned getNumThreadingPaths() { return TPaths.size(); }
Expand Down Expand Up @@ -596,6 +604,12 @@ struct AllSwitchPaths {

Visited.insert(BB);

// Stop if we have reached the BB out of loop, since its successors have no
// impact on the DFA.
// TODO: Do we need to stop exploring if BB is the outer loop of the switch?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so. Would that cause any test changes?

Copy link
Member Author

@XChy XChy Mar 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it would break test2 in dfa-jump-threading-transform.ll. This test contains nested loops. If we don't see the outer loop, we can only thread part of switch.

if (!LI->getLoopFor(BB))
return Res;

// Some blocks have multiple edges to the same successor, and this set
// is used to prevent a duplicate path from being generated
SmallSet<BasicBlock *, 4> Successors;
Expand Down Expand Up @@ -737,6 +751,7 @@ struct AllSwitchPaths {
BasicBlock *SwitchBlock;
OptimizationRemarkEmitter *ORE;
std::vector<ThreadingPath> TPaths;
LoopInfo *LI;
};

struct TransformDFA {
Expand Down Expand Up @@ -1283,6 +1298,7 @@ bool DFAJumpThreading::run(Function &F) {

SmallVector<AllSwitchPaths, 2> ThreadableLoops;
bool MadeChanges = false;
LoopInfoBroken = false;

for (BasicBlock &BB : F) {
auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
Expand All @@ -1304,7 +1320,7 @@ bool DFAJumpThreading::run(Function &F) {
if (!Switch.getSelectInsts().empty())
MadeChanges = true;

AllSwitchPaths SwitchPaths(&Switch, ORE);
AllSwitchPaths SwitchPaths(&Switch, ORE, LI);
SwitchPaths.run();

if (SwitchPaths.getNumThreadingPaths() > 0) {
Expand All @@ -1315,10 +1331,15 @@ bool DFAJumpThreading::run(Function &F) {
// strict requirement but it can cause buggy behavior if there is an
// overlap of blocks in different opportunities. There is a lot of room to
// experiment with catching more opportunities here.
// NOTE: To release this contraint, we must handle LoopInfo invalidation
break;
}
}

#ifdef NDEBUG
LI->verify(*DT);
#endif

SmallPtrSet<const Value *, 32> EphValues;
if (ThreadableLoops.size() > 0)
CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
Expand All @@ -1327,6 +1348,7 @@ bool DFAJumpThreading::run(Function &F) {
TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
Transform.run();
MadeChanges = true;
LoopInfoBroken = true;
}

#ifdef EXPENSIVE_CHECKS
Expand All @@ -1347,11 +1369,13 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
OptimizationRemarkEmitter ORE(&F);

if (!DFAJumpThreading(&AC, &DT, &LI, &TTI, &ORE).run(F))
DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
if (!ThreadImpl.run(F))
return PreservedAnalyses::all();

PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
if (!ThreadImpl.LoopInfoBroken)
PA.preserve<LoopAnalysis>();
return PA;
}