Skip to content

Commit 5a23d31

Browse files
authored
[Sample Profile] Check hot callsite threshold when inlining a function with a sample profile (#93286)
Currently if a callsite is hot as determined by the sample profile, it is unconditionally inlined barring invalid cases (such as recursion). Inline cost check should still apply because a function's hotness and its inline cost are two different things. For example if a function is calling another very large function multiple times (at different code paths), the large function should not be inlined even if its hot.
1 parent 193e900 commit 5a23d31

File tree

5 files changed

+71
-6
lines changed

5 files changed

+71
-6
lines changed

llvm/lib/Transforms/IPO/SampleProfile.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,10 +1391,11 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
13911391
return InlineCost::getAlways("preinliner");
13921392
}
13931393

1394-
// For old FDO inliner, we inline the call site as long as cost is not
1395-
// "Never". The cost-benefit check is done earlier.
1394+
// For old FDO inliner, we inline the call site if it is below hot threshold,
1395+
// even if the function is hot based on sample profile data. This is to
1396+
// prevent huge functions from being inlined.
13961397
if (!CallsitePrioritizedInline) {
1397-
return InlineCost::get(Cost.getCost(), INT_MAX);
1398+
return InlineCost::get(Cost.getCost(), SampleHotCallSiteThreshold);
13981399
}
13991400

14001401
// Otherwise only use the cost from call analyzer, but overwite threshold with
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
foo:100:100
2+
1: bar:100
3+
1:100
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=100 2>&1 | FileCheck %s
2+
3+
; CHECK: remark: a.cc:6:12: 'bar' inlined into 'foo' to match profiling context with (cost={{.*}}, threshold=100)
4+
; CHECK: define dso_local noundef i32 @foo(i32 noundef %0)
5+
; CHECK-NOT: %2 = tail call noundef i32 @bar(i32 noundef %0)
6+
; CHECK-NEXT: %2 = icmp sgt i32 %0, 1
7+
; CHECK-NEXT: br i1 %2, label %3, label %bar.exit
8+
9+
; Manually lower cost threshold for hot function inlining, so that the function
10+
; is not inlined even profile indicates it as hot.
11+
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=1 2>&1 | FileCheck %s --check-prefix=COST
12+
13+
; COST-NOT: remark
14+
; COST: define dso_local noundef i32 @foo(i32 noundef %0)
15+
; COST-NEXT: %2 = tail call noundef i32 @bar(i32 noundef %0)
16+
17+
define dso_local noundef i32 @bar(i32 noundef %0) #0 !dbg !10 {
18+
%2 = icmp sgt i32 %0, 1
19+
br i1 %2, label %3, label %15
20+
3: ; preds = %1
21+
%4 = add nsw i32 %0, -2
22+
%5 = mul i32 %4, %4
23+
%6 = add i32 %5, %0
24+
%7 = zext nneg i32 %4 to i33
25+
%8 = add nsw i32 %0, -3
26+
%9 = zext i32 %8 to i33
27+
%10 = mul i33 %7, %9
28+
%11 = lshr i33 %10, 1
29+
%12 = trunc nuw i33 %11 to i32
30+
%13 = xor i32 %12, -1
31+
%14 = add i32 %6, %13
32+
br label %15
33+
15: ; preds = %3, %1
34+
%16 = phi i32 [ 0, %1 ], [ %14, %3 ]
35+
ret i32 %16
36+
}
37+
38+
define dso_local noundef i32 @foo(i32 noundef %0) #1 !dbg !20 {
39+
%2 = tail call noundef i32 @bar(i32 noundef %0), !dbg !24
40+
ret i32 %2
41+
}
42+
43+
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" }
44+
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" }
45+
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
46+
47+
!llvm.dbg.cu = !{!0}
48+
!llvm.module.flags = !{!2, !3}
49+
50+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug)
51+
!1 = !DIFile(filename: "a.cc", directory: ".")
52+
!2 = !{i32 2, !"Dwarf Version", i32 4}
53+
!3 = !{i32 2, !"Debug Info Version", i32 3}
54+
!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 1, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
55+
!11 = !DIFile(filename: "a.cc", directory: ".")
56+
!12 = !DISubroutineType(types: !13)
57+
!13 = !{!14, !14}
58+
!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
59+
!20 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !11, file: !11, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
60+
!23 = !DILocation(line: 0, scope: !20)
61+
!24 = !DILocation(line: 6, column: 12, scope: !20)

llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ if.end:
9898
;YAML-NEXT: - String: '(cost='
9999
;YAML-NEXT: - Cost: '15'
100100
;YAML-NEXT: - String: ', threshold='
101-
;YAML-NEXT: - Threshold: '2147483647'
101+
;YAML-NEXT: - Threshold: '3000'
102102
;YAML-NEXT: - String: ')'
103103
;YAML-NEXT: - String: ' at callsite '
104104
;YAML-NEXT: - String: foo

llvm/test/Transforms/SampleProfile/remarks.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
; We are expecting foo() to be inlined in main() (almost all the cycles are
2424
; spent inside foo).
25-
; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21;
25+
; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=3000) at callsite main:0:21;
2626
; CHECK: remark: remarks.cc:9:19: 'rand' inlined into 'main' to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21;
2727

2828
; The back edge for the loop is the hottest edge in the loop subgraph.
@@ -51,7 +51,7 @@
5151
;YAML-NEXT: - String: '(cost='
5252
;YAML-NEXT: - Cost: '130'
5353
;YAML-NEXT: - String: ', threshold='
54-
;YAML-NEXT: - Threshold: '2147483647'
54+
;YAML-NEXT: - Threshold: '3000'
5555
;YAML-NEXT: - String: ')'
5656
;YAML-NEXT: - String: ' at callsite '
5757
;YAML-NEXT: - String: main

0 commit comments

Comments
 (0)