Skip to content

Commit 5d79110

Browse files
authored
[Pipelines] Perform mergefunc after constmerge (#92498)
Constmerge can fold switch jump tables, possibly making functions identical again. It can help mergefunc. On the other hand, the opposite seems unlikely. Fixes #92201.
1 parent b52c512 commit 5d79110

File tree

4 files changed

+141
-5
lines changed

4 files changed

+141
-5
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,17 +1527,18 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
15271527
if (EnableIROutliner)
15281528
MPM.addPass(IROutlinerPass());
15291529

1530-
// Merge functions if requested.
1531-
if (PTO.MergeFunctions)
1532-
MPM.addPass(MergeFunctionsPass());
1533-
15341530
// Now we need to do some global optimization transforms.
15351531
// FIXME: It would seem like these should come first in the optimization
15361532
// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
15371533
// ordering here.
15381534
MPM.addPass(GlobalDCEPass());
15391535
MPM.addPass(ConstantMergePass());
15401536

1537+
// Merge functions if requested. It has a better chance to merge functions
1538+
// after ConstantMerge folded jump tables.
1539+
if (PTO.MergeFunctions)
1540+
MPM.addPass(MergeFunctionsPass());
1541+
15411542
if (PTO.CallGraphProfile && !LTOPreLink)
15421543
MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
15431544
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,9 +281,9 @@
281281
; CHECK-HOT-COLD-SPLIT-NEXT: Running pass: HotColdSplittingPass
282282
; CHECK-IR-OUTLINER-NEXT: Running pass: IROutlinerPass
283283
; CHECK-IR-OUTLINER-NEXT: Running analysis: IRSimilarityAnalysis
284-
; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
285284
; CHECK-O-NEXT: Running pass: GlobalDCEPass
286285
; CHECK-O-NEXT: Running pass: ConstantMergePass
286+
; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
287287
; CHECK-DEFAULT-NEXT: Running pass: CGProfilePass
288288
; CHECK-DEFAULT-NEXT: Running pass: RelLookupTableConverterPass
289289
; CHECK-LTO-NOT: Running pass: RelLookupTableConverterPass
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
3+
4+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-apple-macosx12.0.0"
6+
7+
define i32 @f(i32 noundef %x) {
8+
; CHECK-LABEL: define range(i32 0, 2) i32 @f(
9+
; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 8
12+
; CHECK-NEXT: br i1 [[TMP0]], label %[[SWITCH_LOOKUP:.*]], label %[[SW_EPILOG:.*]]
13+
; CHECK: [[SWITCH_LOOKUP]]:
14+
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[X]] to i64
15+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
16+
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
17+
; CHECK-NEXT: br label %[[SW_EPILOG]]
18+
; CHECK: [[SW_EPILOG]]:
19+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ], [ 0, %[[ENTRY]] ]
20+
; CHECK-NEXT: ret i32 [[X_ADDR_0]]
21+
;
22+
entry:
23+
%x.addr = alloca i32, align 4
24+
store i32 %x, ptr %x.addr, align 4
25+
%0 = load i32, ptr %x.addr, align 4
26+
switch i32 %0, label %sw.default [
27+
i32 0, label %sw.bb
28+
i32 2, label %sw.bb
29+
i32 4, label %sw.bb
30+
i32 6, label %sw.bb
31+
i32 7, label %sw.bb
32+
]
33+
34+
sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry
35+
store i32 1, ptr %x.addr, align 4
36+
br label %sw.epilog
37+
38+
sw.default: ; preds = %entry
39+
store i32 0, ptr %x.addr, align 4
40+
br label %sw.epilog
41+
42+
sw.epilog: ; preds = %sw.default, %sw.bb
43+
%1 = load i32, ptr %x.addr, align 4
44+
ret i32 %1
45+
}
46+
47+
define i32 @g(i32 noundef %x) {
48+
; CHECK-LABEL: define range(i32 0, 2) i32 @g(
49+
; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
50+
; CHECK-NEXT: [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]]
51+
; CHECK-NEXT: ret i32 [[TMP2]]
52+
;
53+
entry:
54+
%x.addr = alloca i32, align 4
55+
store i32 %x, ptr %x.addr, align 4
56+
%0 = load i32, ptr %x.addr, align 4
57+
switch i32 %0, label %sw.default [
58+
i32 0, label %sw.bb
59+
i32 2, label %sw.bb
60+
i32 4, label %sw.bb
61+
i32 6, label %sw.bb
62+
i32 7, label %sw.bb
63+
]
64+
65+
sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry
66+
store i32 1, ptr %x.addr, align 4
67+
br label %sw.epilog
68+
69+
sw.default: ; preds = %entry
70+
store i32 0, ptr %x.addr, align 4
71+
br label %sw.epilog
72+
73+
sw.epilog: ; preds = %sw.default, %sw.bb
74+
%1 = load i32, ptr %x.addr, align 4
75+
ret i32 %1
76+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
3+
4+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-apple-macosx12.0.0"
6+
7+
@switch.table.f = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
8+
@switch.table.g = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
9+
10+
define range(i32 0, 2) i32 @f(i32 noundef %x) local_unnamed_addr {
11+
; CHECK-LABEL: define range(i32 0, 2) i32 @f(
12+
; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
13+
; CHECK-NEXT: [[ENTRY:.*]]:
14+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 8
15+
; CHECK-NEXT: br i1 [[TMP0]], label %[[SWITCH_LOOKUP:.*]], label %[[SW_EPILOG:.*]]
16+
; CHECK: [[SWITCH_LOOKUP]]:
17+
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[X]] to i64
18+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
19+
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
20+
; CHECK-NEXT: br label %[[SW_EPILOG]]
21+
; CHECK: [[SW_EPILOG]]:
22+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ], [ 0, %[[ENTRY]] ]
23+
; CHECK-NEXT: ret i32 [[X_ADDR_0]]
24+
;
25+
entry:
26+
%0 = icmp ult i32 %x, 8
27+
br i1 %0, label %switch.lookup, label %sw.epilog
28+
29+
switch.lookup: ; preds = %entry
30+
%1 = zext nneg i32 %x to i64
31+
%switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.f, i64 0, i64 %1
32+
%switch.load = load i32, ptr %switch.gep, align 4
33+
br label %sw.epilog
34+
35+
sw.epilog: ; preds = %entry, %switch.lookup
36+
%x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
37+
ret i32 %x.addr.0
38+
}
39+
40+
define range(i32 0, 2) i32 @g(i32 noundef %x) local_unnamed_addr {
41+
; CHECK-LABEL: define range(i32 0, 2) i32 @g(
42+
; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
43+
; CHECK-NEXT: [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]]
44+
; CHECK-NEXT: ret i32 [[TMP2]]
45+
;
46+
entry:
47+
%0 = icmp ult i32 %x, 8
48+
br i1 %0, label %switch.lookup, label %sw.epilog
49+
50+
switch.lookup: ; preds = %entry
51+
%1 = zext nneg i32 %x to i64
52+
%switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 %1
53+
%switch.load = load i32, ptr %switch.gep, align 4
54+
br label %sw.epilog
55+
56+
sw.epilog: ; preds = %entry, %switch.lookup
57+
%x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
58+
ret i32 %x.addr.0
59+
}

0 commit comments

Comments
 (0)