Skip to content

Commit b48450c

Browse files
authored
[MergeFuncs] Use sizeWithoutDebug to decide if we create a thunk (#68627)
I noticed that when we determine the size of the function to figure out if its profitable, we include debug instructions which can end up making larger functions than necessary.
1 parent b6043f9 commit b48450c

File tree

3 files changed

+137
-31
lines changed

3 files changed

+137
-31
lines changed

llvm/lib/Transforms/IPO/MergeFunctions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ static bool canCreateThunkFor(Function *F) {
653653
// Don't merge tiny functions using a thunk, since it can just end up
654654
// making the function larger.
655655
if (F->size() == 1) {
656-
if (F->front().size() <= 2) {
656+
if (F->front().sizeWithoutDebug() < 2) {
657657
LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
658658
<< " is too small to bother creating a thunk for\n");
659659
return false;

llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll

Lines changed: 75 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
12
; RUN: opt -passes='default<O0>,mergefunc' -S -mergefunc-preserve-debug-info < %s | FileCheck %s --check-prefix=OPTIMIZATION_LEVEL_0
23
; RUN: opt -passes='default<O2>,mergefunc' -S -mergefunc-preserve-debug-info < %s | FileCheck %s --check-prefix=OPTIMIZATION_LEVEL_2
34

@@ -43,6 +44,48 @@
4344

4445
; Function Attrs: nounwind uwtable
4546
define i32 @maxA(i32 %x, i32 %y) !dbg !6 {
47+
; OPTIMIZATION_LEVEL_0-LABEL: define i32 @maxA
48+
; OPTIMIZATION_LEVEL_0-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) !dbg [[DBG6:![0-9]+]] {
49+
; OPTIMIZATION_LEVEL_0-NEXT: entry:
50+
; OPTIMIZATION_LEVEL_0-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
51+
; OPTIMIZATION_LEVEL_0-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
52+
; OPTIMIZATION_LEVEL_0-NEXT: [[I:%.*]] = alloca i32, align 4
53+
; OPTIMIZATION_LEVEL_0-NEXT: [[M:%.*]] = alloca i32, align 4
54+
; OPTIMIZATION_LEVEL_0-NEXT: [[J:%.*]] = alloca i32, align 4
55+
; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
56+
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[X_ADDR]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]]
57+
; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
58+
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[Y_ADDR]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]]
59+
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]]
60+
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[M]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]]
61+
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[J]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]]
62+
; OPTIMIZATION_LEVEL_0-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4, !dbg [[DBG21:![0-9]+]]
63+
; OPTIMIZATION_LEVEL_0-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4, !dbg [[DBG23:![0-9]+]]
64+
; OPTIMIZATION_LEVEL_0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]], !dbg [[DBG24:![0-9]+]]
65+
; OPTIMIZATION_LEVEL_0-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !dbg [[DBG25:![0-9]+]]
66+
; OPTIMIZATION_LEVEL_0: if.then:
67+
; OPTIMIZATION_LEVEL_0-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4, !dbg [[DBG26:![0-9]+]]
68+
; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[TMP2]], ptr [[M]], align 4, !dbg [[DBG27:![0-9]+]]
69+
; OPTIMIZATION_LEVEL_0-NEXT: br label [[IF_END:%.*]], !dbg [[DBG28:![0-9]+]]
70+
; OPTIMIZATION_LEVEL_0: if.else:
71+
; OPTIMIZATION_LEVEL_0-NEXT: [[TMP3:%.*]] = load i32, ptr [[Y_ADDR]], align 4, !dbg [[DBG29:![0-9]+]]
72+
; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[TMP3]], ptr [[M]], align 4, !dbg [[DBG30:![0-9]+]]
73+
; OPTIMIZATION_LEVEL_0-NEXT: br label [[IF_END]]
74+
; OPTIMIZATION_LEVEL_0: if.end:
75+
; OPTIMIZATION_LEVEL_0-NEXT: [[TMP4:%.*]] = load i32, ptr [[M]], align 4, !dbg [[DBG31:![0-9]+]]
76+
; OPTIMIZATION_LEVEL_0-NEXT: ret i32 [[TMP4]], !dbg [[DBG32:![0-9]+]]
77+
;
78+
; OPTIMIZATION_LEVEL_2-LABEL: define i32 @maxA
79+
; OPTIMIZATION_LEVEL_2-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
80+
; OPTIMIZATION_LEVEL_2-NEXT: entry:
81+
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]]
82+
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[Y]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]]
83+
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.declare(metadata ptr undef, metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]]
84+
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.declare(metadata ptr undef, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]]
85+
; OPTIMIZATION_LEVEL_2-NEXT: [[X_Y:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 [[Y]])
86+
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[X_Y]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]]
87+
; OPTIMIZATION_LEVEL_2-NEXT: ret i32 [[X_Y]], !dbg [[DBG19:![0-9]+]]
88+
;
4689
entry:
4790
%x.addr = alloca i32, align 4
4891
%y.addr = alloca i32, align 4
@@ -81,26 +124,27 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
81124

82125
; Function Attrs: nounwind uwtable
83126
define i32 @maxB(i32 %x, i32 %y) !dbg !34 {
127+
; OPTIMIZATION_LEVEL_0-LABEL: define i32 @maxB
128+
; OPTIMIZATION_LEVEL_0-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) !dbg [[DBG33:![0-9]+]] {
129+
; OPTIMIZATION_LEVEL_0-NEXT: entry:
130+
; OPTIMIZATION_LEVEL_0-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
131+
; OPTIMIZATION_LEVEL_0-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4
132+
; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
133+
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[X_ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]]
134+
; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4
135+
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[Y_ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37:![0-9]+]]
136+
; OPTIMIZATION_LEVEL_0-NEXT: [[TMP0:%.*]] = tail call i32 @maxA(i32 [[X]], i32 [[Y]]), !dbg [[DBG38:![0-9]+]]
137+
; OPTIMIZATION_LEVEL_0-NEXT: ret i32 [[TMP0]], !dbg [[DBG38]]
138+
;
139+
; OPTIMIZATION_LEVEL_2-LABEL: define i32 @maxB
140+
; OPTIMIZATION_LEVEL_2-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !dbg [[DBG20:![0-9]+]] {
141+
; OPTIMIZATION_LEVEL_2-NEXT: entry:
142+
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22:![0-9]+]]
143+
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[Y]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]]
144+
; OPTIMIZATION_LEVEL_2-NEXT: [[TMP0:%.*]] = tail call i32 @maxA(i32 [[X]], i32 [[Y]]) #[[ATTR0]], !dbg [[DBG24:![0-9]+]]
145+
; OPTIMIZATION_LEVEL_2-NEXT: ret i32 [[TMP0]], !dbg [[DBG24]]
146+
;
84147

85-
; OPTIMIZATION_LEVEL_0: define i32 @maxB(i32 %x, i32 %y)
86-
; OPTIMIZATION_LEVEL_0-NEXT: entry:
87-
; OPTIMIZATION_LEVEL_0-NEXT: %x.addr = alloca i32, align 4
88-
; OPTIMIZATION_LEVEL_0-NEXT: %y.addr = alloca i32, align 4
89-
; OPTIMIZATION_LEVEL_0-NEXT: store i32 %x, ptr %x.addr, align 4
90-
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr %x.addr, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
91-
; OPTIMIZATION_LEVEL_0-NEXT: store i32 %y, ptr %y.addr, align 4
92-
; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr %y.addr, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
93-
; OPTIMIZATION_LEVEL_0-NEXT: %0 = tail call i32 @maxA(i32 %x, i32 %y), !dbg !{{[0-9]+}}
94-
; OPTIMIZATION_LEVEL_0-NEXT: ret i32 %0, !dbg !{{[0-9]+}}
95-
; OPTIMIZATION_LEVEL_0-NEXT: }
96-
97-
; OPTIMIZATION_LEVEL_2: define i32 @maxB(i32 %x, i32 %y)
98-
; OPTIMIZATION_LEVEL_2-NEXT: entry:
99-
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %x, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
100-
; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %y, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
101-
; OPTIMIZATION_LEVEL_2-NEXT: %0 = tail call i32 @maxA(i32 %x, i32 %y) #{{[0-9]+}}, !dbg !{{[0-9]+}}
102-
; OPTIMIZATION_LEVEL_2-NEXT: ret i32 %0, !dbg !{{[0-9]+}}
103-
; OPTIMIZATION_LEVEL_2-NEXT: }
104148

105149
entry:
106150
%x.addr = alloca i32, align 4
@@ -137,18 +181,19 @@ if.end: ; preds = %if.else, %if.then
137181

138182
; Function Attrs: nounwind uwtable
139183
define void @f() !dbg !57 {
184+
; OPTIMIZATION_LEVEL_0-LABEL: define void @f
185+
; OPTIMIZATION_LEVEL_0-SAME: () !dbg [[DBG39:![0-9]+]] {
186+
; OPTIMIZATION_LEVEL_0-NEXT: entry:
187+
; OPTIMIZATION_LEVEL_0-NEXT: [[CALL:%.*]] = call i32 @maxA(i32 3, i32 4), !dbg [[DBG42:![0-9]+]]
188+
; OPTIMIZATION_LEVEL_0-NEXT: [[CALL1:%.*]] = call i32 @maxB(i32 1, i32 9), !dbg [[DBG43:![0-9]+]]
189+
; OPTIMIZATION_LEVEL_0-NEXT: ret void, !dbg [[DBG44:![0-9]+]]
190+
;
191+
; OPTIMIZATION_LEVEL_2-LABEL: define void @f
192+
; OPTIMIZATION_LEVEL_2-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] !dbg [[DBG25:![0-9]+]] {
193+
; OPTIMIZATION_LEVEL_2-NEXT: entry:
194+
; OPTIMIZATION_LEVEL_2-NEXT: ret void, !dbg [[DBG28:![0-9]+]]
195+
;
140196
entry:
141-
142-
; OPTIMIZATION_LEVEL_0: define void @f()
143-
; OPTIMIZATION_LEVEL_0-NEXT: entry:
144-
; OPTIMIZATION_LEVEL_0-NEXT: %call = call i32 @maxA(i32 3, i32 4), !dbg !{{[0-9]+}}
145-
; OPTIMIZATION_LEVEL_0-NEXT: %call1 = call i32 @maxB(i32 1, i32 9), !dbg !{{[0-9]+}}
146-
; OPTIMIZATION_LEVEL_0-NEXT: ret void, !dbg !{{[0-9]+}}
147-
148-
; OPTIMIZATION_LEVEL_2: define void @f()
149-
; OPTIMIZATION_LEVEL_2-NEXT: entry:
150-
; OPTIMIZATION_LEVEL_2-NEXT: ret void, !dbg !{{[0-9]+}}
151-
152197
%call = call i32 @maxA(i32 3, i32 4), !dbg !60
153198
%call1 = call i32 @maxB(i32 1, i32 9), !dbg !61
154199
ret void, !dbg !62
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
;; Make sure debug instructions are not counted when deciding to merge functions
3+
; RUN: opt -S -passes=mergefunc < %s | FileCheck %s
4+
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
6+
7+
; Function Attrs: nounwind readnone
8+
define hidden i32 @f(i32 %t) {
9+
; CHECK-LABEL: define hidden i32 @f
10+
; CHECK-SAME: (i32 [[T:%.*]]) {
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]]
13+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6]], metadata !DIExpression()), !dbg [[DBG12]]
14+
; CHECK-NEXT: ret i32 0
15+
;
16+
entry:
17+
call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata !DIExpression()), !dbg !13
18+
call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata !DIExpression()), !dbg !13
19+
ret i32 0
20+
}
21+
22+
; Function Attrs: nounwind readnone
23+
define hidden i32 @f_thunk(i32 %t) {
24+
; CHECK-LABEL: define hidden i32 @f_thunk
25+
; CHECK-SAME: (i32 [[T:%.*]]) {
26+
; CHECK-NEXT: entry:
27+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6]], metadata !DIExpression()), !dbg [[DBG12]]
28+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6]], metadata !DIExpression()), !dbg [[DBG12]]
29+
; CHECK-NEXT: ret i32 0
30+
;
31+
entry:
32+
call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata !DIExpression()), !dbg !13
33+
call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata !DIExpression()), !dbg !13
34+
ret i32 0
35+
}
36+
37+
; Function Attrs: nounwind readnone speculatable
38+
declare void @llvm.dbg.value(metadata, metadata, metadata)
39+
40+
!llvm.dbg.cu = !{!0}
41+
!llvm.module.flags = !{!3, !4, !5}
42+
43+
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
44+
!1 = !DIFile(filename: "no-merge-debug-thunks.c", directory: "/tmp")
45+
!2 = !{}
46+
!3 = !{i32 2, !"Dwarf Version", i32 4}
47+
!4 = !{i32 2, !"Debug Info Version", i32 3}
48+
!5 = !{i32 1, !"wchar_size", i32 4}
49+
!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !11)
50+
!8 = !DISubroutineType(types: !9)
51+
!9 = !{!10, !10}
52+
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
53+
!11 = !{!12}
54+
!12 = !DILocalVariable(name: "t", arg: 1, scope: !7, file: !1, line: 3, type: !10)
55+
!13 = !DILocation(line: 3, column: 14, scope: !7)
56+
!14 = !DILocation(line: 4, column: 12, scope: !7)
57+
!16 = distinct !DISubprogram(name: "_start", scope: !1, file: !1, line: 7, type: !17, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, unit: !0, retainedNodes: !2)
58+
!17 = !DISubroutineType(types: !18)
59+
!18 = !{!10}
60+
!19 = !DILocation(line: 8, column: 3, scope: !16)
61+
!20 = !DILocation(line: 9, column: 3, scope: !16)

0 commit comments

Comments
 (0)