Skip to content

Commit 386aa2a

Browse files
committed
[FuncSpec] Increase the maximum number of times the specializer can run.
* Changes the default value of FuncSpecMaxIters from 1 to 10. This allows specialization of recursive functions. * Adds an option to control the maximum codesize growth per function. * Measured ~45% performance uplift for SPEC2017:548.exchange2_r on AWS Graviton3. Differential Revision: https://reviews.llvm.org/D145819
1 parent d82f0b7 commit 386aa2a

File tree

4 files changed

+78
-3
lines changed

4 files changed

+78
-3
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ class FunctionSpecializer {
225225
SmallPtrSet<Function *, 32> Specializations;
226226
SmallPtrSet<Function *, 32> FullySpecialized;
227227
DenseMap<Function *, CodeMetrics> FunctionMetrics;
228+
DenseMap<Function *, unsigned> FunctionGrowth;
228229

229230
public:
230231
FunctionSpecializer(

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ static cl::opt<unsigned> MinFunctionSize(
9393
"Don't specialize functions that have less than this number of "
9494
"instructions"));
9595

96+
static cl::opt<unsigned> MaxCodeSizeGrowth(
97+
"funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc(
98+
"Maximum codesize growth allowed per function"));
99+
96100
static cl::opt<unsigned> MinCodeSizeSavings(
97101
"funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc(
98102
"Reject specializations whose codesize savings are less than this"
@@ -841,7 +845,10 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
841845
<< B.CodeSize << ", Latency = " << B.Latency
842846
<< ", Inlining = " << Score << "}\n");
843847

844-
auto IsProfitable = [&FuncSize](Bonus &B, unsigned Score) -> bool {
848+
FunctionGrowth[F] += FuncSize - B.CodeSize;
849+
850+
auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
851+
unsigned FuncGrowth) -> bool {
845852
// No check required.
846853
if (ForceSpecialization)
847854
return true;
@@ -854,11 +861,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
854861
// Minimum latency savings.
855862
if (B.Latency < MinLatencySavings * FuncSize / 100)
856863
return false;
864+
// Maximum codesize growth.
865+
if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
866+
return false;
857867
return true;
858868
};
859869

860870
// Discard unprofitable specialisations.
861-
if (!IsProfitable(B, Score))
871+
if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F]))
862872
continue;
863873

864874
// Create a new specialisation entry.

llvm/lib/Transforms/IPO/SCCP.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ STATISTIC(NumInstReplaced,
4343
"Number of instructions replaced with (simpler) instruction");
4444

4545
static cl::opt<unsigned> FuncSpecMaxIters(
46-
"funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc(
46+
"funcspec-max-iters", cl::init(10), cl::Hidden, cl::desc(
4747
"The maximum number of iterations function specialization is run"));
4848

4949
static void findReturnsToZap(Function &F,
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -passes="ipsccp<func-spec>,inline,instcombine,simplifycfg" -S \
3+
; RUN: -funcspec-min-function-size=23 -funcspec-max-iters=100 \
4+
; RUN: -debug-only=function-specialization < %s 2>&1 | FileCheck %s
5+
6+
; Make sure the number of specializations created are not
7+
; linear to the number of iterations (funcspec-max-iters).
8+
9+
; CHECK: FnSpecialization: Created 4 specializations in module
10+
11+
@Global = internal constant i32 1, align 4
12+
13+
define internal void @recursiveFunc(ptr readonly %arg) {
14+
%temp = alloca i32, align 4
15+
%arg.load = load i32, ptr %arg, align 4
16+
%arg.cmp = icmp slt i32 %arg.load, 10000
17+
br i1 %arg.cmp, label %loop1, label %ret.block
18+
19+
loop1:
20+
br label %loop2
21+
22+
loop2:
23+
br label %loop3
24+
25+
loop3:
26+
br label %loop4
27+
28+
loop4:
29+
br label %block6
30+
31+
block6:
32+
call void @print_val(i32 %arg.load)
33+
%arg.add = add nsw i32 %arg.load, 1
34+
store i32 %arg.add, ptr %temp, align 4
35+
call void @recursiveFunc(ptr %temp)
36+
br label %loop4.end
37+
38+
loop4.end:
39+
%exit_cond1 = call i1 @exit_cond()
40+
br i1 %exit_cond1, label %loop4, label %loop3.end
41+
42+
loop3.end:
43+
%exit_cond2 = call i1 @exit_cond()
44+
br i1 %exit_cond2, label %loop3, label %loop2.end
45+
46+
loop2.end:
47+
%exit_cond3 = call i1 @exit_cond()
48+
br i1 %exit_cond3, label %loop2, label %loop1.end
49+
50+
loop1.end:
51+
%exit_cond4 = call i1 @exit_cond()
52+
br i1 %exit_cond4, label %loop1, label %ret.block
53+
54+
ret.block:
55+
ret void
56+
}
57+
58+
define i32 @main() {
59+
call void @recursiveFunc(ptr @Global)
60+
ret i32 0
61+
}
62+
63+
declare dso_local void @print_val(i32)
64+
declare dso_local i1 @exit_cond()

0 commit comments

Comments
 (0)