Skip to content

Commit 2079798

Browse files
committed
Outline non returning functions unless a longjmp
__assert_fail, abort, exit etc. are cold. TODO: outline throw Authored by: rjf (Ruijie Fang) Reviewed by: hiraditya,tejohnson,fhahn Differential Revision: https://reviews.llvm.org/D69257
1 parent 6043498 commit 2079798

File tree

6 files changed

+528
-5
lines changed

6 files changed

+528
-5
lines changed

llvm/lib/Transforms/IPO/HotColdSplitting.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ bool blockEndsInUnreachable(const BasicBlock &BB) {
101101
return !(isa<ReturnInst>(I) || isa<IndirectBrInst>(I));
102102
}
103103

104-
bool unlikelyExecuted(BasicBlock &BB) {
104+
bool unlikelyExecuted(BasicBlock &BB, ProfileSummaryInfo *PSI,
105+
BlockFrequencyInfo *BFI) {
105106
// Exception handling blocks are unlikely executed.
106107
if (BB.isEHPad() || isa<ResumeInst>(BB.getTerminator()))
107108
return true;
@@ -114,12 +115,19 @@ bool unlikelyExecuted(BasicBlock &BB) {
114115
return true;
115116

116117
// The block is cold if it has an unreachable terminator, unless it's
117-
// preceded by a call to a (possibly warm) noreturn call (e.g. longjmp).
118+
// preceded by a call to a (possibly warm) noreturn call (e.g. longjmp);
119+
// in the case of a longjmp, if the block is cold according to
120+
// profile information, we mark it as unlikely to be executed as well.
118121
if (blockEndsInUnreachable(BB)) {
119122
if (auto *CI =
120123
dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
121-
if (CI->hasFnAttr(Attribute::NoReturn))
122-
return false;
124+
if (CI->hasFnAttr(Attribute::NoReturn)) {
125+
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
126+
return (II->getIntrinsicID() != Intrinsic::eh_sjlj_longjmp) ||
127+
(BFI && PSI->isColdBlock(&BB, BFI));
128+
return !CI->getCalledFunction()->getName().contains("longjmp") ||
129+
(BFI && PSI->isColdBlock(&BB, BFI));
130+
}
123131
return true;
124132
}
125133

@@ -575,7 +583,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
575583
continue;
576584

577585
bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
578-
(EnableStaticAnalyis && unlikelyExecuted(*BB));
586+
(EnableStaticAnalyis && unlikelyExecuted(*BB, PSI, BFI));
579587
if (!Cold)
580588
continue;
581589

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
2+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
3+
target triple = "x86_64-unknown-linux-gnu"
4+
5+
%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t }
6+
%struct.__sigset_t = type { [16 x i64] }
7+
8+
@c = dso_local global i32 1, align 4
9+
@buf = dso_local global [20 x i8*] zeroinitializer, align 16
10+
11+
; CHECK-LABEL: @f
12+
; CHECK-NOT: f.cold.1
13+
define dso_local void @f() #0 {
14+
entry:
15+
%i = alloca i32, align 4
16+
%j = alloca i32, align 4
17+
%k = alloca i32, align 4
18+
%0 = load i32, i32* @c, align 4
19+
%tobool = icmp ne i32 %0, 0
20+
br i1 %tobool, label %if.then, label %if.else
21+
22+
if.then: ; preds = %entry
23+
ret void
24+
25+
if.else: ; preds = %entry
26+
%1 = load i32, i32* @c, align 4
27+
%inc = add i32 %1, 1
28+
store i32 %inc, i32* @c, align 4
29+
%2 = load i32, i32* @c, align 4
30+
%inc1 = add i32 %2, 1
31+
store i32 %inc1, i32* @c, align 4
32+
%3 = load i32, i32* @c, align 4
33+
%inc2 = add i32 %3, 1
34+
store i32 %inc2, i32* @c, align 4
35+
%4 = load i32, i32* @c, align 4
36+
%inc3 = add i32 %4, 1
37+
store i32 %inc3, i32* @c, align 4
38+
%5 = load i32, i32* @c, align 4
39+
%dec = add i32 %5, -1
40+
store i32 %dec, i32* @c, align 4
41+
%6 = load i32, i32* @c, align 4
42+
%dec4 = add i32 %6, -1
43+
store i32 %dec4, i32* @c, align 4
44+
%7 = load i32, i32* @c, align 4
45+
%inc5 = add i32 %7, 1
46+
store i32 %inc5, i32* @c, align 4
47+
%8 = load i32, i32* @c, align 4
48+
%inc6 = add i32 %8, 1
49+
store i32 %inc6, i32* @c, align 4
50+
%9 = load i32, i32* @c, align 4
51+
%add = add i32 %9, 1
52+
store i32 %add, i32* %i, align 4
53+
%10 = load i32, i32* %i, align 4
54+
%sub = sub i32 %10, 1
55+
store i32 %sub, i32* %j, align 4
56+
%11 = load i32, i32* %i, align 4
57+
%add7 = add i32 %11, 2
58+
store i32 %add7, i32* %k, align 4
59+
call void @longjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*), i32 1) #3
60+
unreachable
61+
}
62+
63+
declare dso_local void @longjmp(%struct.__jmp_buf_tag*, i32) #1
64+
65+
; CHECK-LABEL: @main
66+
; CHECK-NOT: main.cold.1
67+
define dso_local i32 @main() #0 {
68+
entry:
69+
%retval = alloca i32, align 4
70+
%i = alloca i32, align 4
71+
store i32 0, i32* %retval, align 4
72+
store i32 0, i32* %i, align 4
73+
%call = call i32 @_setjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*)) #4
74+
%tobool = icmp ne i32 %call, 0
75+
br i1 %tobool, label %if.then, label %if.end
76+
77+
if.then: ; preds = %entry
78+
store i32 1, i32* %retval, align 4
79+
br label %return
80+
81+
if.end: ; preds = %entry
82+
call void @f()
83+
store i32 0, i32* %retval, align 4
84+
br label %return
85+
86+
return: ; preds = %if.end, %if.then
87+
%0 = load i32, i32* %retval, align 4
88+
ret i32 %0
89+
}
90+
91+
declare dso_local i32 @_setjmp(%struct.__jmp_buf_tag*) #2
92+
93+
attributes #0 = { nounwind uwtable }
94+
attributes #1 = { noreturn nounwind }
95+
attributes #2 = { nounwind returns_twice }
96+
attributes #3 = { noreturn nounwind }
97+
attributes #4 = { nounwind returns_twice }
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; RUN: opt -profile-summary-cold-count=0 -hotcoldsplit -S < %s | FileCheck %s
2+
3+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t }
7+
%struct.__sigset_t = type { [16 x i64] }
8+
9+
@c = dso_local global i32 1, align 4
10+
@buf = dso_local global [20 x i8*] zeroinitializer, align 16
11+
12+
; CHECK-LABEL: @f
13+
; CHECK: f.cold.1
14+
define dso_local void @f() #0 !prof !31 {
15+
entry:
16+
%i = alloca i32, align 4
17+
%j = alloca i32, align 4
18+
%k = alloca i32, align 4
19+
%0 = load i32, i32* @c, align 4
20+
%tobool = icmp ne i32 %0, 0
21+
br i1 %tobool, label %if.then, label %if.else, !prof !32
22+
23+
if.then: ; preds = %entry
24+
ret void
25+
26+
if.else: ; preds = %entry
27+
%1 = load i32, i32* @c, align 4
28+
%inc = add i32 %1, 1
29+
store i32 %inc, i32* @c, align 4
30+
%2 = load i32, i32* @c, align 4
31+
%inc1 = add i32 %2, 1
32+
store i32 %inc1, i32* @c, align 4
33+
%3 = load i32, i32* @c, align 4
34+
%inc2 = add i32 %3, 1
35+
store i32 %inc2, i32* @c, align 4
36+
%4 = load i32, i32* @c, align 4
37+
%inc3 = add i32 %4, 1
38+
store i32 %inc3, i32* @c, align 4
39+
%5 = load i32, i32* @c, align 4
40+
%dec = add i32 %5, -1
41+
store i32 %dec, i32* @c, align 4
42+
%6 = load i32, i32* @c, align 4
43+
%dec4 = add i32 %6, -1
44+
store i32 %dec4, i32* @c, align 4
45+
%7 = load i32, i32* @c, align 4
46+
%inc5 = add i32 %7, 1
47+
store i32 %inc5, i32* @c, align 4
48+
%8 = load i32, i32* @c, align 4
49+
%inc6 = add i32 %8, 1
50+
store i32 %inc6, i32* @c, align 4
51+
%9 = load i32, i32* @c, align 4
52+
%add = add i32 %9, 1
53+
store i32 %add, i32* %i, align 4
54+
%10 = load i32, i32* %i, align 4
55+
%sub = sub i32 %10, 1
56+
store i32 %sub, i32* %j, align 4
57+
%11 = load i32, i32* %i, align 4
58+
%add7 = add i32 %11, 2
59+
store i32 %add7, i32* %k, align 4
60+
call void @longjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*), i32 1) #3
61+
unreachable
62+
}
63+
64+
declare dso_local void @longjmp(%struct.__jmp_buf_tag*, i32) #1
65+
66+
define dso_local i32 @main() #0 !prof !31 {
67+
entry:
68+
%retval = alloca i32, align 4
69+
%i = alloca i32, align 4
70+
store i32 0, i32* %retval, align 4
71+
store i32 0, i32* %i, align 4
72+
%call = call i32 @_setjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*)) #4
73+
%tobool = icmp ne i32 %call, 0
74+
br i1 %tobool, label %if.then, label %if.end, !prof !33
75+
76+
if.then: ; preds = %entry
77+
store i32 1, i32* %retval, align 4
78+
br label %return
79+
80+
if.end: ; preds = %entry
81+
call void @f()
82+
store i32 0, i32* %retval, align 4
83+
br label %return
84+
85+
return: ; preds = %if.end, %if.then
86+
%0 = load i32, i32* %retval, align 4
87+
ret i32 %0
88+
}
89+
90+
declare dso_local i32 @_setjmp(%struct.__jmp_buf_tag*) #2
91+
92+
attributes #0 = { inlinehint nounwind uwtable }
93+
attributes #1 = { noreturn nounwind }
94+
attributes #2 = { nounwind returns_twice }
95+
attributes #3 = { noreturn nounwind }
96+
attributes #4 = { nounwind returns_twice }
97+
98+
!llvm.module.flags = !{!0, !1}
99+
100+
!0 = !{i32 1, !"wchar_size", i32 4}
101+
!1 = !{i32 1, !"ProfileSummary", !2}
102+
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
103+
!3 = !{!"ProfileFormat", !"InstrProf"}
104+
!4 = !{!"TotalCount", i64 2}
105+
!5 = !{!"MaxCount", i64 1}
106+
!6 = !{!"MaxInternalCount", i64 1}
107+
!7 = !{!"MaxFunctionCount", i64 1}
108+
!8 = !{!"NumCounts", i64 4}
109+
!9 = !{!"NumFunctions", i64 2}
110+
!10 = !{!"IsPartialProfile", i64 0}
111+
!11 = !{!"PartialProfileRatio", double 0.000000e+00}
112+
!12 = !{!"DetailedSummary", !13}
113+
!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29}
114+
!14 = !{i32 10000, i64 0, i32 0}
115+
!15 = !{i32 100000, i64 0, i32 0}
116+
!16 = !{i32 200000, i64 0, i32 0}
117+
!17 = !{i32 300000, i64 0, i32 0}
118+
!18 = !{i32 400000, i64 0, i32 0}
119+
!19 = !{i32 500000, i64 1, i32 2}
120+
!20 = !{i32 600000, i64 1, i32 2}
121+
!21 = !{i32 700000, i64 1, i32 2}
122+
!22 = !{i32 800000, i64 1, i32 2}
123+
!23 = !{i32 900000, i64 1, i32 2}
124+
!24 = !{i32 950000, i64 1, i32 2}
125+
!25 = !{i32 990000, i64 1, i32 2}
126+
!26 = !{i32 999000, i64 1, i32 2}
127+
!27 = !{i32 999900, i64 1, i32 2}
128+
!28 = !{i32 999990, i64 1, i32 2}
129+
!29 = !{i32 999999, i64 1, i32 2}
130+
!31 = !{!"function_entry_count", i64 1}
131+
!32 = !{!"branch_weights", i32 1, i32 0}
132+
!33 = !{!"branch_weights", i32 0, i32 1}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
2+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
3+
target triple = "x86_64-unknown-linux-gnu"
4+
5+
@c = dso_local global i32 1, align 4
6+
@buf = dso_local global [20 x i8*] zeroinitializer, align 16
7+
8+
; CHECK-LABEL: @f
9+
; CHECK-NOT: f.cold.1
10+
define dso_local void @f() #0 {
11+
entry:
12+
%i = alloca i32, align 4
13+
%j = alloca i32, align 4
14+
%k = alloca i32, align 4
15+
%0 = load i32, i32* @c, align 4
16+
%tobool = icmp ne i32 %0, 0
17+
br i1 %tobool, label %if.then, label %if.else
18+
19+
if.then: ; preds = %entry
20+
ret void
21+
22+
if.else: ; preds = %entry
23+
%1 = load i32, i32* @c, align 4
24+
%inc = add nsw i32 %1, 1
25+
store i32 %inc, i32* @c, align 4
26+
%2 = load i32, i32* @c, align 4
27+
%inc1 = add nsw i32 %2, 1
28+
store i32 %inc1, i32* @c, align 4
29+
%3 = load i32, i32* @c, align 4
30+
%inc2 = add nsw i32 %3, 1
31+
store i32 %inc2, i32* @c, align 4
32+
%4 = load i32, i32* @c, align 4
33+
%inc3 = add nsw i32 %4, 1
34+
store i32 %inc3, i32* @c, align 4
35+
%5 = load i32, i32* @c, align 4
36+
%dec = add nsw i32 %5, -1
37+
store i32 %dec, i32* @c, align 4
38+
%6 = load i32, i32* @c, align 4
39+
%dec4 = add nsw i32 %6, -1
40+
store i32 %dec4, i32* @c, align 4
41+
%7 = load i32, i32* @c, align 4
42+
%inc5 = add nsw i32 %7, 1
43+
store i32 %inc5, i32* @c, align 4
44+
%8 = load i32, i32* @c, align 4
45+
%inc6 = add nsw i32 %8, 1
46+
store i32 %inc6, i32* @c, align 4
47+
%9 = load i32, i32* @c, align 4
48+
%add = add nsw i32 %9, 1
49+
store i32 %add, i32* %i, align 4
50+
%10 = load i32, i32* %i, align 4
51+
%sub = sub nsw i32 %10, 1
52+
store i32 %sub, i32* %j, align 4
53+
%11 = load i32, i32* %i, align 4
54+
%add7 = add nsw i32 %11, 2
55+
store i32 %add7, i32* %k, align 4
56+
call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*))
57+
unreachable
58+
}
59+
60+
declare void @llvm.eh.sjlj.longjmp(i8*) #1
61+
62+
; CHECK-LABEL: @main
63+
; CHECK-NOT: main.cold.1
64+
define dso_local i32 @main() #0 {
65+
entry:
66+
%retval = alloca i32, align 4
67+
%i = alloca i32, align 4
68+
store i32 0, i32* %retval, align 4
69+
store i32 0, i32* %i, align 4
70+
%0 = call i8* @llvm.frameaddress.p0i8(i32 0)
71+
store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16
72+
%1 = call i8* @llvm.stacksave()
73+
store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16
74+
%2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*))
75+
%tobool = icmp ne i32 %2, 0
76+
br i1 %tobool, label %if.then, label %if.end
77+
78+
if.then: ; preds = %entry
79+
store i32 1, i32* %retval, align 4
80+
br label %return
81+
82+
if.end: ; preds = %entry
83+
call void @f()
84+
store i32 0, i32* %retval, align 4
85+
br label %return
86+
87+
return: ; preds = %if.end, %if.then
88+
%3 = load i32, i32* %retval, align 4
89+
ret i32 %3
90+
}
91+
92+
declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2
93+
94+
declare i8* @llvm.stacksave() #3
95+
96+
declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
97+
98+
attributes #0 = { nounwind uwtable }
99+
attributes #1 = { noreturn nounwind }
100+
attributes #2 = { nounwind readnone }
101+
attributes #3 = { nounwind }
102+
103+

0 commit comments

Comments
 (0)