Skip to content

Commit bb2e85f

Browse files
authored
[AMDGPU] Improve StructurizeCFG pass performance: avoid redundant DebugLoc map initialization. NFC. (#130568)
Previously, the TermDL (BB terminator → DebugLoc) map was initialized at the start of processing each function's region, creating entries for the entire function. This could be inefficient for large functions. This patch improves performance by creating map entries only when needed—when a terminator is being killed or when a flow block is created. Additionally, entries are removed immediately after use, preventing unnecessary map growth and ensuring DebugLocs are not "retracked." A mapless variant was also explored, but due to limited familiarity with the structurizer, it was not pursued further. In my cases, this change improves performance by 2-3×.
1 parent 1e83d97 commit bb2e85f

File tree

1 file changed

+25
-15
lines changed

1 file changed

+25
-15
lines changed

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,8 @@ class StructurizeCFG {
361361

362362
void rebuildSSA();
363363

364+
void setDebugLoc(BranchInst *Br, BasicBlock *BB);
365+
364366
public:
365367
void init(Region *R);
366368
bool run(Region *R, DominatorTree *DT);
@@ -595,14 +597,6 @@ void StructurizeCFG::collectInfos() {
595597
// Find the last back edges
596598
analyzeLoops(RN);
597599
}
598-
599-
// Reset the collected term debug locations
600-
TermDL.clear();
601-
602-
for (BasicBlock &BB : *Func) {
603-
if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
604-
TermDL[&BB] = DL;
605-
}
606600
}
607601

608602
/// Insert the missing branch conditions
@@ -924,12 +918,24 @@ void StructurizeCFG::simplifyAffectedPhis() {
924918
} while (Changed);
925919
}
926920

921+
void StructurizeCFG::setDebugLoc(BranchInst *Br, BasicBlock *BB) {
922+
auto I = TermDL.find(BB);
923+
if (I == TermDL.end())
924+
return;
925+
926+
Br->setDebugLoc(I->second);
927+
TermDL.erase(I);
928+
}
929+
927930
/// Remove phi values from all successors and then remove the terminator.
928931
void StructurizeCFG::killTerminator(BasicBlock *BB) {
929932
Instruction *Term = BB->getTerminator();
930933
if (!Term)
931934
return;
932935

936+
if (const DebugLoc &DL = Term->getDebugLoc())
937+
TermDL[BB] = DL;
938+
933939
for (BasicBlock *Succ : successors(BB))
934940
delPhiValues(BB, Succ);
935941

@@ -974,7 +980,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
974980
BasicBlock *BB = Node->getNodeAs<BasicBlock>();
975981
killTerminator(BB);
976982
BranchInst *Br = BranchInst::Create(NewExit, BB);
977-
Br->setDebugLoc(TermDL[BB]);
983+
setDebugLoc(Br, BB);
978984
addPhiValues(BB, NewExit);
979985
if (IncludeDominator)
980986
DT->changeImmediateDominator(NewExit, BB);
@@ -990,10 +996,14 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
990996
Func, Insert);
991997
FlowSet.insert(Flow);
992998

993-
// use a temporary variable to avoid a use-after-free if the map's storage is
994-
// reallocated
995-
DebugLoc DL = TermDL[Dominator];
996-
TermDL[Flow] = std::move(DL);
999+
if (auto *Term = Dominator->getTerminator()) {
1000+
if (const DebugLoc &DL = Term->getDebugLoc())
1001+
TermDL[Flow] = DL;
1002+
} else if (DebugLoc DLTemp = TermDL.lookup(Dominator)) {
1003+
// Use a temporary copy to avoid a use-after-free if the map's storage
1004+
// is reallocated.
1005+
TermDL[Flow] = DLTemp;
1006+
}
9971007

9981008
DT->addNewBlock(Flow, Dominator);
9991009
ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
@@ -1088,7 +1098,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
10881098

10891099
// let it point to entry and next block
10901100
BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1091-
Br->setDebugLoc(TermDL[Flow]);
1101+
setDebugLoc(Br, Flow);
10921102
Conditions.push_back(Br);
10931103
addPhiValues(Flow, Entry);
10941104
DT->changeImmediateDominator(Entry, Flow);
@@ -1129,7 +1139,7 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed,
11291139
LoopEnd = needPrefix(false);
11301140
BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
11311141
BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1132-
Br->setDebugLoc(TermDL[LoopEnd]);
1142+
setDebugLoc(Br, LoopEnd);
11331143
LoopConds.push_back(Br);
11341144
addPhiValues(LoopEnd, LoopStart);
11351145
setPrevNode(Next);

0 commit comments

Comments
 (0)