Skip to content

Commit 50b2a11

Browse files
Move "auto-init" instructions to the dominator of their users
As a result of -ftrivial-auto-var-init, clang generates instructions to set alloca'd memory to a given pattern, right after the allocation site. In some cases, this (somehow costly) operation could be delayed, leading to conditional execution in some cases. This is not an uncommon situation: it happens ~500 times on the cPython code base, and much more on the LLVM codebase. The benefit greatly varies on the execution path, but it should not regress on performance. This is a recommit of cca0100 with MemorySSA update fixes. Differential Revision: https://reviews.llvm.org/D137707
1 parent a198ade commit 50b2a11

19 files changed

+613
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===- MoveAutoInit.h - Move insts marked as auto-init Pass --*- C++ -*-======//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass moves instructions marked as auto-init closer to their use if
10+
// profitable, generally because it moves them under a guard, potentially
11+
// skipping the overhead of the auto-init under some execution paths.
12+
//
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#ifndef LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
17+
#define LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
18+
19+
#include "llvm/IR/PassManager.h"
20+
21+
namespace llvm {
22+
23+
class MoveAutoInitPass : public PassInfoMixin<MoveAutoInitPass> {
24+
public:
25+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
26+
};
27+
} // end namespace llvm
28+
29+
#endif // LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@
247247
#include "llvm/Transforms/Utils/LowerSwitch.h"
248248
#include "llvm/Transforms/Utils/Mem2Reg.h"
249249
#include "llvm/Transforms/Utils/MetaRenamer.h"
250+
#include "llvm/Transforms/Utils/MoveAutoInit.h"
250251
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
251252
#include "llvm/Transforms/Utils/PredicateInfo.h"
252253
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
126126
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
127127
#include "llvm/Transforms/Utils/Mem2Reg.h"
128+
#include "llvm/Transforms/Utils/MoveAutoInit.h"
128129
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
129130
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
130131
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
@@ -656,6 +657,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
656657
FPM.addPass(MemCpyOptPass());
657658

658659
FPM.addPass(DSEPass());
660+
FPM.addPass(MoveAutoInitPass());
661+
659662
FPM.addPass(createFunctionToLoopPassAdaptor(
660663
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
661664
/*AllowSpeculation=*/true),
@@ -1802,6 +1805,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
18021805

18031806
// Nuke dead stores.
18041807
MainFPM.addPass(DSEPass());
1808+
MainFPM.addPass(MoveAutoInitPass());
18051809
MainFPM.addPass(MergedLoadStoreMotionPass());
18061810

18071811
LoopPassManager LPM;

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
333333
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
334334
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
335335
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
336+
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
336337
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
337338
FUNCTION_PASS("newgvn", NewGVNPass())
338339
FUNCTION_PASS("jump-threading", JumpThreadingPass())

llvm/lib/Transforms/Utils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ add_llvm_component_library(LLVMTransformUtils
5858
MetaRenamer.cpp
5959
MisExpect.cpp
6060
ModuleUtils.cpp
61+
MoveAutoInit.cpp
6162
NameAnonGlobals.cpp
6263
PredicateInfo.cpp
6364
PromoteMemoryToRegister.cpp
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass moves instruction maked as auto-init closer to the basic block that
10+
// use it, eventually removing it from some control path of the function.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/Transforms/Utils/MoveAutoInit.h"
15+
#include "llvm/ADT/STLExtras.h"
16+
#include "llvm/ADT/Statistic.h"
17+
#include "llvm/ADT/StringSet.h"
18+
#include "llvm/Analysis/MemorySSA.h"
19+
#include "llvm/Analysis/MemorySSAUpdater.h"
20+
#include "llvm/IR/DebugInfo.h"
21+
#include "llvm/IR/Dominators.h"
22+
#include "llvm/IR/IRBuilder.h"
23+
#include "llvm/IR/Instructions.h"
24+
#include "llvm/IR/IntrinsicInst.h"
25+
#include "llvm/Support/CommandLine.h"
26+
#include "llvm/Transforms/Utils.h"
27+
#include "llvm/Transforms/Utils/LoopUtils.h"
28+
29+
using namespace llvm;
30+
31+
#define DEBUG_TYPE "move-auto-init"
32+
33+
STATISTIC(NumMoved, "Number of instructions moved");
34+
35+
static cl::opt<unsigned> MoveAutoInitThreshold(
36+
"move-auto-init-threshold", cl::Hidden, cl::init(128),
37+
cl::desc("Maximum instructions to analyze per moved initialization"));
38+
39+
static bool hasAutoInitMetadata(const Instruction &I) {
40+
return I.hasMetadata(LLVMContext::MD_annotation) &&
41+
any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(),
42+
[](const MDOperand &Op) {
43+
return cast<MDString>(Op.get())->getString() == "auto-init";
44+
});
45+
}
46+
47+
/// Finds a BasicBlock in the CFG where instruction `I` can be moved to while
48+
/// not changing the Memory SSA ordering and being guarded by at least one
49+
/// condition.
50+
static BasicBlock *usersDominator(Instruction *I, DominatorTree &DT,
51+
MemorySSA &MSSA) {
52+
BasicBlock *CurrentDominator = nullptr;
53+
MemoryLocation ML;
54+
if (auto *MI = dyn_cast<MemIntrinsic>(I))
55+
ML = MemoryLocation::getForDest(MI);
56+
else if (auto *SI = dyn_cast<StoreInst>(I))
57+
ML = MemoryLocation::get(SI);
58+
else
59+
assert(false && "memory location set");
60+
61+
MemoryUseOrDef &IMA = *MSSA.getMemoryAccess(I);
62+
BatchAAResults AA(MSSA.getAA());
63+
64+
SmallPtrSet<MemoryAccess *, 8> Visited;
65+
66+
auto AsMemoryAccess = [](User *U) { return cast<MemoryAccess>(U); };
67+
SmallVector<MemoryAccess *> WorkList(map_range(IMA.users(), AsMemoryAccess));
68+
69+
while (!WorkList.empty()) {
70+
MemoryAccess *MA = WorkList.pop_back_val();
71+
if (!Visited.insert(MA).second)
72+
continue;
73+
74+
if (Visited.size() > MoveAutoInitThreshold)
75+
return nullptr;
76+
77+
bool FoundClobberingUser = false;
78+
if (auto *M = dyn_cast<MemoryUseOrDef>(MA)) {
79+
Instruction *MI = M->getMemoryInst();
80+
81+
// If this memory instruction may not clobber `I`, we can skip it.
82+
// LifetimeEnd is a valid user, but we do not want it in the user
83+
// dominator.
84+
if (AA.getModRefInfo(MI, ML) != ModRefInfo::NoModRef &&
85+
!MI->isLifetimeStartOrEnd() && MI != I) {
86+
FoundClobberingUser = true;
87+
CurrentDominator = CurrentDominator
88+
? DT.findNearestCommonDominator(CurrentDominator,
89+
MI->getParent())
90+
: MI->getParent();
91+
}
92+
}
93+
if (!FoundClobberingUser) {
94+
auto UsersAsMemoryAccesses = map_range(MA->users(), AsMemoryAccess);
95+
append_range(WorkList, UsersAsMemoryAccesses);
96+
}
97+
}
98+
return CurrentDominator;
99+
}
100+
101+
static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) {
102+
BasicBlock &EntryBB = F.getEntryBlock();
103+
SmallVector<std::pair<Instruction *, BasicBlock *>> JobList;
104+
105+
//
106+
// Compute movable instructions.
107+
//
108+
for (Instruction &I : EntryBB) {
109+
if (!hasAutoInitMetadata(I))
110+
continue;
111+
112+
assert(!I.isVolatile() && "auto init instructions cannot be volatile.");
113+
114+
BasicBlock *UsersDominator = usersDominator(&I, DT, MSSA);
115+
if (!UsersDominator)
116+
continue;
117+
118+
if (UsersDominator == &EntryBB)
119+
continue;
120+
121+
// Traverse the CFG to detect cycles `UsersDominator` would be part of.
122+
SmallPtrSet<BasicBlock *, 8> TransitiveSuccessors;
123+
SmallVector<BasicBlock *> WorkList(successors(UsersDominator));
124+
bool HasCycle = false;
125+
while (!WorkList.empty()) {
126+
BasicBlock *CurrBB = WorkList.pop_back_val();
127+
if (CurrBB == UsersDominator)
128+
// No early exit because we want to compute the full set of transitive
129+
// successors.
130+
HasCycle = true;
131+
for (BasicBlock *Successor : successors(CurrBB)) {
132+
if (!TransitiveSuccessors.insert(Successor).second)
133+
continue;
134+
WorkList.push_back(Successor);
135+
}
136+
}
137+
138+
// Don't insert if that could create multiple execution of I,
139+
// but we can insert it in the non back-edge predecessors, if it exists.
140+
if (HasCycle) {
141+
BasicBlock *UsersDominatorHead = UsersDominator;
142+
while (BasicBlock *UniquePredecessor =
143+
UsersDominatorHead->getUniquePredecessor())
144+
UsersDominatorHead = UniquePredecessor;
145+
146+
if (UsersDominatorHead == &EntryBB)
147+
continue;
148+
149+
BasicBlock *DominatingPredecessor = nullptr;
150+
for (BasicBlock *Pred : predecessors(UsersDominatorHead)) {
151+
// If one of the predecessor of the dominator also transitively is a
152+
// successor, moving to the dominator would do the inverse of loop
153+
// hoisting, and we don't want that.
154+
if (TransitiveSuccessors.count(Pred))
155+
continue;
156+
157+
DominatingPredecessor =
158+
DominatingPredecessor
159+
? DT.findNearestCommonDominator(DominatingPredecessor, Pred)
160+
: Pred;
161+
}
162+
163+
if (!DominatingPredecessor || DominatingPredecessor == &EntryBB)
164+
continue;
165+
166+
UsersDominator = DominatingPredecessor;
167+
}
168+
169+
// CatchSwitchInst blocks can only have one instruction, so they are not
170+
// good candidates for insertion.
171+
while (isa<CatchSwitchInst>(UsersDominator->getFirstInsertionPt())) {
172+
for (BasicBlock *Pred : predecessors(UsersDominator))
173+
UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred);
174+
}
175+
176+
// We finally found a place where I can be moved while not introducing extra
177+
// execution, and guarded by at least one condition.
178+
if (UsersDominator != &EntryBB)
179+
JobList.emplace_back(&I, UsersDominator);
180+
}
181+
182+
//
183+
// Perform the actual substitution.
184+
//
185+
if (JobList.empty())
186+
return false;
187+
188+
MemorySSAUpdater MSSAU(&MSSA);
189+
190+
// Reverse insertion to respect relative order between instructions:
191+
// if two instructions are moved from the same BB to the same BB, we insert
192+
// the second one in the front, then the first on top of it.
193+
for (auto &Job : reverse(JobList)) {
194+
Job.first->moveBefore(&*Job.second->getFirstInsertionPt());
195+
MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
196+
MemorySSA::InsertionPlace::Beginning);
197+
}
198+
199+
if (VerifyMemorySSA)
200+
MSSA.verifyMemorySSA();
201+
202+
NumMoved += JobList.size();
203+
204+
return true;
205+
}
206+
207+
PreservedAnalyses MoveAutoInitPass::run(Function &F,
208+
FunctionAnalysisManager &AM) {
209+
210+
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
211+
auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
212+
if (!runMoveAutoInit(F, DT, MSSA))
213+
return PreservedAnalyses::all();
214+
215+
PreservedAnalyses PA;
216+
PA.preserve<DominatorTreeAnalysis>();
217+
PA.preserve<MemorySSAAnalysis>();
218+
PA.preserveSet<CFGAnalyses>();
219+
return PA;
220+
}

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@
206206
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
207207
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
208208
; CHECK-O23SZ-NEXT: Running pass: DSEPass
209+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
209210
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
210211
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
211212
; CHECK-O23SZ-NEXT: Running pass: LICMPass

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@
108108
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
109109
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
110110
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
111+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
111112
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
112113
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
113114
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@
149149
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
150150
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
151151
; CHECK-O23SZ-NEXT: Running pass: DSEPass
152+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
152153
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
153154
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
154155
; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
; CHECK-O-NEXT: Running pass: ADCEPass
137137
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
138138
; CHECK-O23SZ-NEXT: Running pass: DSEPass
139+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
139140
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
140141
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
141142
; CHECK-O23SZ-NEXT: Running pass: LICMPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
; CHECK-O-NEXT: Running pass: ADCEPass
144144
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
145145
; CHECK-O23SZ-NEXT: Running pass: DSEPass
146+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
146147
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
147148
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
148149
; CHECK-O23SZ-NEXT: Running pass: LICMPass

llvm/test/Other/new-pm-thinlto-prelink-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@
147147
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
148148
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
149149
; CHECK-O23SZ-NEXT: Running pass: DSEPass
150+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
150151
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
151152
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
152153
; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop

llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@
172172
; CHECK-O-NEXT: Running pass: ADCEPass
173173
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
174174
; CHECK-O23SZ-NEXT: Running pass: DSEPass
175+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
175176
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
176177
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
177178
; CHECK-O23SZ-NEXT: Running pass: LICMPass

llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137
; CHECK-O-NEXT: Running pass: ADCEPass
138138
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
139139
; CHECK-O23SZ-NEXT: Running pass: DSEPass
140+
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
140141
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
141142
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
142143
; CHECK-O23SZ-NEXT: Running pass: LICMPass

0 commit comments

Comments
 (0)