Skip to content

Commit 2d9b6a0

Browse files
[GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable fixed-point iteration (#94291)
This adds an option to CombinerInfo to limit the number of iterations in the Combiner. This option is then used to disable fixed-point iteration for the AArch64O0PreLegalizerCombiner. The combines there are simple enough that code quality impact should be minimal with the current heuristics (instructions are processed from top to bottom of the basic block, new/changed instructions are added back to the worklist). Test changes are due to some instructions not being DCE'd, which has no actual impact because InstructionSelect performs DCE as well. AArch64 CTMark O0: -0.9% geomean compile-time (instruction count) no change in size..text for any of the benchmarks
1 parent a5985ca commit 2d9b6a0

File tree

4 files changed

+71
-26
lines changed

4 files changed

+71
-26
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ struct CombinerInfo {
4949
bool EnableOptSize;
5050
/// Whether we're optimizing for minsize (-Oz).
5151
bool EnableMinSize;
52+
53+
/// The maximum number of times the Combiner will iterate over the
54+
/// MachineFunction. Setting this to 0 enables fixed-point iteration.
55+
unsigned MaxIterations = 0;
5256
};
5357
} // namespace llvm
5458

llvm/lib/CodeGen/GlobalISel/Combiner.cpp

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/CodeGen/GlobalISel/Combiner.h"
1414
#include "llvm/ADT/PostOrderIterator.h"
1515
#include "llvm/ADT/SetVector.h"
16+
#include "llvm/ADT/Statistic.h"
1617
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
1718
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
1819
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -27,6 +28,11 @@
2728

2829
using namespace llvm;
2930

31+
STATISTIC(NumOneIteration, "Number of functions with one iteration");
32+
STATISTIC(NumTwoIterations, "Number of functions with two iterations");
33+
STATISTIC(NumThreeOrMoreIterations,
34+
"Number of functions with three or more iterations");
35+
3036
namespace llvm {
3137
cl::OptionCategory GICombinerOptionCategory(
3238
"GlobalISel Combiner",
@@ -135,7 +141,11 @@ bool Combiner::combineMachineInstrs() {
135141
bool MFChanged = false;
136142
bool Changed;
137143

138-
do {
144+
unsigned Iteration = 0;
145+
while (true) {
146+
++Iteration;
147+
LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');
148+
139149
WorkList.clear();
140150

141151
// Collect all instructions. Do a post order traversal for basic blocks and
@@ -166,7 +176,28 @@ bool Combiner::combineMachineInstrs() {
166176
WLObserver->reportFullyCreatedInstrs();
167177
}
168178
MFChanged |= Changed;
169-
} while (Changed);
179+
180+
if (!Changed) {
181+
LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #"
182+
<< Iteration << '\n');
183+
break;
184+
}
185+
// Iterate until a fixed-point is reached if MaxIterations == 0,
186+
// otherwise limit the number of iterations.
187+
if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) {
188+
LLVM_DEBUG(
189+
dbgs() << "\nCombiner reached iteration limit after iteration #"
190+
<< Iteration << '\n');
191+
break;
192+
}
193+
}
194+
195+
if (Iteration == 1)
196+
++NumOneIteration;
197+
else if (Iteration == 2)
198+
++NumTwoIterations;
199+
else
200+
++NumThreeOrMoreIterations;
170201

171202
#ifndef NDEBUG
172203
if (CSEInfo) {

llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,10 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
165165
CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
166166
/*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
167167
F.hasOptSize(), F.hasMinSize());
168+
// Disable fixed-point iteration in the Combiner. This improves compile-time
169+
// at the cost of possibly missing optimizations. See PR#94291 for details.
170+
CInfo.MaxIterations = 1;
171+
168172
AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
169173
/*CSEInfo*/ nullptr, RuleConfig, ST);
170174
return Impl.combineMachineInstrs();

llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,26 +28,27 @@ define i32 @foo() {
2828
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
2929
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
3030
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
31+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
3132
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
3233
; CHECK-NEXT: G_BR %bb.2
3334
; CHECK-NEXT: {{ $}}
3435
; CHECK-NEXT: bb.2.if.then:
3536
; CHECK-NEXT: successors: %bb.3(0x80000000)
3637
; CHECK-NEXT: {{ $}}
3738
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
38-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
39-
; CHECK-NEXT: G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2)
40-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
39+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
40+
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV3]](p0) :: (store (s32) into @var2)
41+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
4142
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
42-
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
43+
; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
4344
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
44-
; CHECK-NEXT: G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3)
45-
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
45+
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV5]](p0) :: (store (s32) into @var3)
46+
; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
4647
; CHECK-NEXT: G_BR %bb.3
4748
; CHECK-NEXT: {{ $}}
4849
; CHECK-NEXT: bb.3.if.end:
49-
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
50-
; CHECK-NEXT: $w0 = COPY [[C6]](s32)
50+
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
51+
; CHECK-NEXT: $w0 = COPY [[C7]](s32)
5152
; CHECK-NEXT: RET_ReallyLR implicit $w0
5253
entry:
5354
%0 = load i32, ptr @var1, align 4
@@ -84,6 +85,7 @@ define i32 @darwin_tls() {
8485
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
8586
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
8687
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]]
88+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
8789
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
8890
; CHECK-NEXT: G_BR %bb.2
8991
; CHECK-NEXT: {{ $}}
@@ -96,8 +98,8 @@ define i32 @darwin_tls() {
9698
; CHECK-NEXT: G_BR %bb.3
9799
; CHECK-NEXT: {{ $}}
98100
; CHECK-NEXT: bb.3.if.end:
99-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
100-
; CHECK-NEXT: $w0 = COPY [[C2]](s32)
101+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
102+
; CHECK-NEXT: $w0 = COPY [[C3]](s32)
101103
; CHECK-NEXT: RET_ReallyLR implicit $w0
102104
entry:
103105
%0 = load i32, ptr @var1, align 4
@@ -127,6 +129,7 @@ define i32 @imm_cost_too_large_cost_of_2() {
127129
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
128130
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
129131
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
132+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
130133
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
131134
; CHECK-NEXT: G_BR %bb.2
132135
; CHECK-NEXT: {{ $}}
@@ -147,8 +150,8 @@ define i32 @imm_cost_too_large_cost_of_2() {
147150
; CHECK-NEXT: bb.4.if.end:
148151
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
149152
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3)
150-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
151-
; CHECK-NEXT: $w0 = COPY [[C3]](s32)
153+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
154+
; CHECK-NEXT: $w0 = COPY [[C4]](s32)
152155
; CHECK-NEXT: RET_ReallyLR implicit $w0
153156
entry:
154157
%0 = load i32, ptr @var1, align 4
@@ -183,6 +186,7 @@ define i64 @imm_cost_too_large_cost_of_4() {
183186
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
184187
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
185188
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
189+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
186190
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
187191
; CHECK-NEXT: G_BR %bb.2
188192
; CHECK-NEXT: {{ $}}
@@ -203,8 +207,8 @@ define i64 @imm_cost_too_large_cost_of_4() {
203207
; CHECK-NEXT: bb.4.if.end:
204208
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
205209
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
206-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
207-
; CHECK-NEXT: $x0 = COPY [[C3]](s64)
210+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
211+
; CHECK-NEXT: $x0 = COPY [[C4]](s64)
208212
; CHECK-NEXT: RET_ReallyLR implicit $x0
209213
entry:
210214
%0 = load i64, ptr @var1_64, align 4
@@ -239,6 +243,7 @@ define i64 @f64_imm_cost_too_high(double %a) {
239243
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
240244
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
241245
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
246+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
242247
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
243248
; CHECK-NEXT: G_BR %bb.2
244249
; CHECK-NEXT: {{ $}}
@@ -259,8 +264,8 @@ define i64 @f64_imm_cost_too_high(double %a) {
259264
; CHECK-NEXT: bb.4.if.end:
260265
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
261266
; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
262-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
263-
; CHECK-NEXT: $x0 = COPY [[C3]](s64)
267+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
268+
; CHECK-NEXT: $x0 = COPY [[C4]](s64)
264269
; CHECK-NEXT: RET_ReallyLR implicit $x0
265270
entry:
266271
%0 = load i64, ptr @var1_64, align 4
@@ -294,31 +299,32 @@ define i64 @f64_imm_cheap(double %a) {
294299
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
295300
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
296301
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
302+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
297303
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
298304
; CHECK-NEXT: G_BR %bb.2
299305
; CHECK-NEXT: {{ $}}
300306
; CHECK-NEXT: bb.2.if.then:
301307
; CHECK-NEXT: successors: %bb.3(0x80000000)
302308
; CHECK-NEXT: {{ $}}
303309
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
304-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
305-
; CHECK-NEXT: G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
310+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
311+
; CHECK-NEXT: G_STORE [[C4]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
306312
; CHECK-NEXT: G_BR %bb.3
307313
; CHECK-NEXT: {{ $}}
308314
; CHECK-NEXT: bb.3.if.then2:
309315
; CHECK-NEXT: successors: %bb.4(0x80000000)
310316
; CHECK-NEXT: {{ $}}
311-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
317+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
312318
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
313-
; CHECK-NEXT: G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
319+
; CHECK-NEXT: G_STORE [[C5]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
314320
; CHECK-NEXT: G_BR %bb.4
315321
; CHECK-NEXT: {{ $}}
316322
; CHECK-NEXT: bb.4.if.end:
317323
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
318-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
319-
; CHECK-NEXT: G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
320-
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
321-
; CHECK-NEXT: $x0 = COPY [[C6]](s64)
324+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
325+
; CHECK-NEXT: G_STORE [[C6]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
326+
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
327+
; CHECK-NEXT: $x0 = COPY [[C7]](s64)
322328
; CHECK-NEXT: RET_ReallyLR implicit $x0
323329
entry:
324330
%0 = load i64, ptr @var1_64, align 4

0 commit comments

Comments
 (0)