Skip to content

[GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable fixed-point iteration #94291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ struct CombinerInfo {
bool EnableOptSize;
/// Whether we're optimizing for minsize (-Oz).
bool EnableMinSize;

/// The maximum number of times the Combiner will iterate over the
/// MachineFunction. Setting this to 0 enables fixed-point iteration.
unsigned MaxIterations = 0;
};
} // namespace llvm

Expand Down
35 changes: 33 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/Combiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
Expand All @@ -27,6 +28,11 @@

using namespace llvm;

STATISTIC(NumOneIteration, "Number of functions with one iteration");
STATISTIC(NumTwoIterations, "Number of functions with two iterations");
STATISTIC(NumThreeOrMoreIterations,
"Number of functions with three or more iterations");

namespace llvm {
cl::OptionCategory GICombinerOptionCategory(
"GlobalISel Combiner",
Expand Down Expand Up @@ -135,7 +141,11 @@ bool Combiner::combineMachineInstrs() {
bool MFChanged = false;
bool Changed;

do {
unsigned Iteration = 0;
while (true) {
++Iteration;
LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');

WorkList.clear();

// Collect all instructions. Do a post order traversal for basic blocks and
Expand Down Expand Up @@ -166,7 +176,28 @@ bool Combiner::combineMachineInstrs() {
WLObserver->reportFullyCreatedInstrs();
}
MFChanged |= Changed;
} while (Changed);

if (!Changed) {
LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #"
<< Iteration << '\n');
break;
}
// Iterate until a fixed-point is reached if MaxIterations == 0,
// otherwise limit the number of iterations.
if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) {
LLVM_DEBUG(
dbgs() << "\nCombiner reached iteration limit after iteration #"
<< Iteration << '\n');
break;
}
}

if (Iteration == 1)
++NumOneIteration;
else if (Iteration == 2)
++NumTwoIterations;
else
++NumThreeOrMoreIterations;

#ifndef NDEBUG
if (CSEInfo) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,10 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
F.hasOptSize(), F.hasMinSize());
// Disable fixed-point iteration in the Combiner. This improves compile-time
// at the cost of possibly missing optimizations. See PR#94291 for details.
CInfo.MaxIterations = 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment, perhaps calling back to this PR # so readers will know why we set it to 1.


AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
/*CSEInfo*/ nullptr, RuleConfig, ST);
return Impl.combineMachineInstrs();
Expand Down
54 changes: 30 additions & 24 deletions llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,27 @@ define i32 @foo() {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.if.then:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV3]](p0) :: (store (s32) into @var2)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
; CHECK-NEXT: G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3)
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
; CHECK-NEXT: G_STORE [[C5]](s32), [[GV5]](p0) :: (store (s32) into @var3)
; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C6]](s32)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C7]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%0 = load i32, ptr @var1, align 4
Expand Down Expand Up @@ -84,6 +85,7 @@ define i32 @darwin_tls() {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
Expand All @@ -96,8 +98,8 @@ define i32 @darwin_tls() {
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C2]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C3]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%0 = load i32, ptr @var1, align 4
Expand Down Expand Up @@ -127,6 +129,7 @@ define i32 @imm_cost_too_large_cost_of_2() {
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
Expand All @@ -147,8 +150,8 @@ define i32 @imm_cost_too_large_cost_of_2() {
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C3]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C4]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%0 = load i32, ptr @var1, align 4
Expand Down Expand Up @@ -183,6 +186,7 @@ define i64 @imm_cost_too_large_cost_of_4() {
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
Expand All @@ -203,8 +207,8 @@ define i64 @imm_cost_too_large_cost_of_4() {
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: $x0 = COPY [[C3]](s64)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: $x0 = COPY [[C4]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
entry:
%0 = load i64, ptr @var1_64, align 4
Expand Down Expand Up @@ -239,6 +243,7 @@ define i64 @f64_imm_cost_too_high(double %a) {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
Expand All @@ -259,8 +264,8 @@ define i64 @f64_imm_cost_too_high(double %a) {
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: $x0 = COPY [[C3]](s64)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: $x0 = COPY [[C4]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
entry:
%0 = load i64, ptr @var1_64, align 4
Expand Down Expand Up @@ -294,31 +299,32 @@ define i64 @f64_imm_cheap(double %a) {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.if.then:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: G_STORE [[C4]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.then2:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
; CHECK-NEXT: G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
; CHECK-NEXT: G_STORE [[C5]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
; CHECK-NEXT: G_BR %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: $x0 = COPY [[C6]](s64)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: G_STORE [[C6]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: $x0 = COPY [[C7]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
entry:
%0 = load i64, ptr @var1_64, align 4
Expand Down
Loading