Skip to content

Commit 154cd6d

Browse files
committed
[SampleFDO] Fix invalid branch profile generated by indirect call promotion.
Suppose an inline instance has hot total sample count but 0 entry count, and it is an indirect call target. If the indirect call has no other call target and inline instance associated with it and it is promoted, currently the conditional branch generated by indirect call promotion will have invalid branch profile which is !{!"branch_weights", i32 0, i32 0} -- because the entry count of the promoted target is 0 and the total entry count of all targets is also 0. This caused a SEGV in Control Height Reduction and may cause problem in other passes. Function entry count of an inline instance is computed by a heuristic -- using either the sample of the starting line or starting inner inline instance. The patch changes the heuristic a little bit so that when total sample count is larger than 0, the computed entry count will be at least 1. Then the new branch profile will be !{!"branch_weights", i32 1, i32 0}. Differential Revision: https://reviews.llvm.org/D72790
1 parent e445447 commit 154cd6d

File tree

5 files changed

+35
-10
lines changed

5 files changed

+35
-10
lines changed

llvm/include/llvm/ProfileData/SampleProf.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -416,21 +416,21 @@ class FunctionSamples {
416416
/// Return the sample count of the first instruction of the function.
417417
/// The function can be either a standalone symbol or an inlined function.
418418
uint64_t getEntrySamples() const {
419+
uint64_t Count = 0;
419420
// Use either BodySamples or CallsiteSamples which ever has the smaller
420421
// lineno.
421422
if (!BodySamples.empty() &&
422423
(CallsiteSamples.empty() ||
423424
BodySamples.begin()->first < CallsiteSamples.begin()->first))
424-
return BodySamples.begin()->second.getSamples();
425-
if (!CallsiteSamples.empty()) {
426-
uint64_t T = 0;
425+
Count = BodySamples.begin()->second.getSamples();
426+
else if (!CallsiteSamples.empty()) {
427427
// An indirect callsite may be promoted to several inlined direct calls.
428428
// We need to get the sum of them.
429429
for (const auto &N_FS : CallsiteSamples.begin()->second)
430-
T += N_FS.second.getEntrySamples();
431-
return T;
430+
Count += N_FS.second.getEntrySamples();
432431
}
433-
return 0;
432+
// Return at least 1 if total sample is not 0.
433+
return Count ? Count : TotalSamples > 0;
434434
}
435435

436436
/// Return all the samples collected in the body of the function.
Binary file not shown.

llvm/test/Transforms/SampleProfile/Inputs/indirect-call.prof

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,8 @@ return_arg_caller:3000:0
2929
11: 3000
3030
2: return_arg:3000
3131
1: 3000
32+
branch_prof_valid:4000:0
33+
1: 1000
34+
2: foo_inline3:3000
35+
1: 0
36+
2: 3000

llvm/test/Transforms/SampleProfile/indirect-call.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,19 @@ else:
121121
ret i32* null
122122
}
123123

124+
; CHECK-LABEL: @branch_prof_valid
125+
; Check the conditional branch generated by indirect call promotion won't
126+
; have invalid profile like !{!"branch_weights", i32 0, i32 0}.
127+
define void @branch_prof_valid(void ()* %t0) !dbg !33 {
128+
%t1 = alloca void ()*
129+
store void ()* %t0, void ()** %t1
130+
%t2 = load void ()*, void ()** %t1
131+
; CHECK-NOT: call {{.*}}
132+
; CHECK: br i1 {{.*}}, label %if.true.direct_targ, label %if.false.orig_indirect, {{.*}}, !prof ![[BR3:[0-9]+]]
133+
call void %t2(), !dbg !34
134+
ret void
135+
}
136+
124137
@x = global i32 0, align 4
125138
@y = global void ()* null, align 8
126139

@@ -148,6 +161,10 @@ define i32* @foo_inline2(i32* %x) !dbg !19 {
148161
ret i32* %x
149162
}
150163

164+
define void @foo_inline3() !dbg !35 {
165+
ret void
166+
}
167+
151168
define i32 @foo_noinline(i32 %x) !dbg !20 {
152169
ret i32 %x
153170
}
@@ -184,6 +201,7 @@ define void @test_direct() !dbg !22 {
184201
; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000}
185202
; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000}
186203
; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000}
204+
; CHECK: ![[BR3]] = !{!"branch_weights", i32 1, i32 0}
187205
!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0)
188206
!7 = !DILocation(line: 7, scope: !6)
189207
!8 = distinct !DISubprogram(name: "test_inline_strip", scope: !1, file: !1, line: 8, unit: !0)
@@ -211,3 +229,6 @@ define void @test_direct() !dbg !22 {
211229
!30 = distinct !DISubprogram(name: "return_arg_caller", scope: !1, file: !1, line: 11, unit: !0)
212230
!31 = !DILocation(line: 12, scope: !30)
213231
!32 = !DILocation(line: 13, scope: !30)
232+
!33 = distinct !DISubprogram(name: "branch_prof_valid", scope: !1, file: !1, line: 25, unit: !0)
233+
!34 = !DILocation(line: 27, scope: !33)
234+
!35 = distinct !DISubprogram(name: "foo_inline3", scope: !1, file: !1, line: 29, unit: !0)

llvm/test/Transforms/SampleProfile/inline-callee-update.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
@y = global i32* ()* null, align 8
66
@z = global i32* ()* null, align 8
77

8+
; CHECK: define i32* @sample_loader_inlinee() {{.*}} !prof ![[ENTRY:[0-9]+]]
89
define i32* @sample_loader_inlinee() !dbg !3 {
910
bb:
1011
%tmp = call i32* @direct_leaf_func(i32* null), !dbg !4
@@ -20,6 +21,7 @@ else: ; preds = %bb
2021
ret i32* null
2122
}
2223

24+
; CHECK: define i32* @cgscc_inlinee() {{.*}} !prof ![[ENTRY:[0-9]+]]
2325
define i32* @cgscc_inlinee() !dbg !6 {
2426
bb:
2527
%tmp = call i32* @direct_leaf_func(i32* null), !dbg !7
@@ -67,7 +69,4 @@ declare i32* @direct_leaf_func(i32*)
6769
!12 = !DILocation(line: 21, scope: !11)
6870

6971
; Make sure the ImportGUID stays with entry count metadata for ThinLTO-PreLink
70-
; CHECK: distinct !DISubprogram(name: "sample_loader_inlinee"
71-
; CHECK-NEXT: {!"function_entry_count", i64 1, i64 -9171813444624716006}
72-
; CHECK: distinct !DISubprogram(name: "cgscc_inlinee"
73-
; CHECK-NEXT: !{!"function_entry_count", i64 0, i64 -9171813444624716006}
72+
; CHECK: ![[ENTRY]] = !{!"function_entry_count", i64 1, i64 -9171813444624716006}

0 commit comments

Comments
 (0)