Skip to content

Commit fbc8457

Browse files
authored
Add profitability check to array-property-opt (#29236)
* Add profitability check to array-property-opt This change adds additional heuristics to array-property-opt to avoid code size increase in cases where the optimization may not be beneficial. With this change, we do not specialize a loop if it has any opaque function calls or the instruction count exceeds a predefined threshold.
1 parent 82b3420 commit fbc8457

File tree

1 file changed

+83
-2
lines changed

1 file changed

+83
-2
lines changed

lib/SILOptimizer/LoopTransforms/ArrayPropertyOpt.cpp

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,64 @@ class ArrayPropertiesAnalysis {
8282
SILBasicBlock *Preheader;
8383
DominanceInfo *DomTree;
8484

85+
llvm::DenseMap<SILFunction *, uint32_t> InstCountCache;
8586
llvm::SmallSet<SILValue, 16> HoistableArray;
8687

8788
SmallPtrSet<SILBasicBlock *, 16> ReachingBlocks;
8889
SmallPtrSet<SILBasicBlock *, 16> CachedExitingBlocks;
90+
91+
// This controls the max instructions the analysis can scan before giving up
92+
const uint32_t AnalysisThreshold = 5000;
93+
// This controls the max threshold for instruction count in the loop
94+
const uint32_t LoopInstCountThreshold = 500;
95+
8996
public:
9097
ArrayPropertiesAnalysis(SILLoop *L, DominanceAnalysis *DA)
9198
: Fun(L->getHeader()->getParent()), Loop(L), Preheader(nullptr),
9299
DomTree(DA->get(Fun)) {}
93100

101+
/// Check if it is profitable to specialize a loop when you see an apply
102+
/// instruction. We consider it is not profitable to specialize the loop when:
103+
/// 1. The callee is not found in the module, or cannot be determined
104+
/// 2. The number of instructions the analysis scans has exceeded the AnalysisThreshold
105+
uint32_t checkProfitabilityRecursively(SILFunction *Callee) {
106+
if (!Callee)
107+
return AnalysisThreshold;
108+
109+
auto CacheEntry = InstCountCache.find(Callee);
110+
if (CacheEntry != InstCountCache.end())
111+
return CacheEntry->second;
112+
113+
InstCountCache.insert(std::make_pair(Callee, 0));
114+
115+
uint32_t InstCount = 0;
116+
117+
for (auto &BB : *Callee) {
118+
for (auto &I : BB) {
119+
if (InstCount++ >= AnalysisThreshold) {
120+
LLVM_DEBUG(llvm::dbgs() << "ArrayPropertyOpt: Disabled Reason - Exceeded Analysis Threshold in "
121+
<< BB.getParent()->getName() << "\n");
122+
InstCountCache[Callee] = AnalysisThreshold;
123+
return AnalysisThreshold;
124+
}
125+
if (auto Apply = FullApplySite::isa(&I)) {
126+
auto Callee = Apply.getReferencedFunctionOrNull();
127+
if (!Callee) {
128+
LLVM_DEBUG(llvm::dbgs() << "ArrayPropertyOpt: Disabled Reason - Found opaque code in "
129+
<< BB.getParent()->getName() << "\n");
130+
LLVM_DEBUG(Apply.dump());
131+
LLVM_DEBUG(I.getOperand(0)->dump());
132+
}
133+
const auto CalleeInstCount = checkProfitabilityRecursively(Callee);
134+
InstCount += CalleeInstCount;
135+
}
136+
}
137+
}
138+
InstCountCache[Callee] = InstCount;
139+
140+
return InstCount;
141+
}
142+
94143
bool run() {
95144
Preheader = Loop->getLoopPreheader();
96145
if (!Preheader) {
@@ -107,10 +156,11 @@ class ArrayPropertiesAnalysis {
107156
// beneficial. This heuristic also simplifies which regions we want to
108157
// specialize on. We will specialize the outermost loopnest that has
109158
// 'array.props' instructions in its preheader.
159+
110160
bool FoundHoistable = false;
161+
uint32_t LoopInstCount = 0;
111162
for (auto *BB : Loop->getBlocks()) {
112163
for (auto &Inst : *BB) {
113-
114164
// Can't clone alloc_stack instructions whose dealloc_stack is outside
115165
// the loop.
116166
if (!Loop->canDuplicate(&Inst))
@@ -122,11 +172,42 @@ class ArrayPropertiesAnalysis {
122172

123173
if (!canHoistArrayPropsInst(ArrayPropsInst))
124174
return false;
175+
176+
LoopInstCount++;
125177
FoundHoistable = true;
126178
}
127179
}
128180

129-
return FoundHoistable;
181+
if (!FoundHoistable)
182+
return false;
183+
184+
// If the LoopInstCount exceeds the threshold, we will disable the optimization on this loop
185+
// For loops of deeper nesting we increase the threshold by an additional 10%
186+
if (LoopInstCount > LoopInstCountThreshold * (1 + (Loop->getLoopDepth() - 1) / 10)) {
187+
LLVM_DEBUG(llvm::dbgs() << "Exceeded LoopInstCountThreshold\n");
188+
return false;
189+
}
190+
191+
// Additionally, we don't specialize the loop if we find opaque code or
192+
// the analysis scans instructions greater than a threshold
193+
// Since only few loops qualify as hoistable, and the profitability check
194+
// can run long in cases of large thresholds, these checks are not folded
195+
// along with the legality checks above.
196+
for (auto *BB : Loop->getBlocks()) {
197+
for (auto &Inst : *BB) {
198+
if (auto Apply = FullApplySite::isa(&Inst)) {
199+
const auto Callee = Apply.getReferencedFunctionOrNull();
200+
auto CalleeInstCount = checkProfitabilityRecursively(Callee);
201+
if (CalleeInstCount >= AnalysisThreshold)
202+
return false;
203+
}
204+
}
205+
}
206+
207+
LLVM_DEBUG(llvm::dbgs() << "Profitable ArrayPropertyOpt in "
208+
<< Loop->getLoopPreheader()->getParent()->getName() << "\n");
209+
LLVM_DEBUG(Loop->dump());
210+
return true;
130211
}
131212

132213
private:

0 commit comments

Comments
 (0)