Skip to content

Commit ccdcb98

Browse files
committed
[IRCE] Relax profitability check
IRCE currently has two profitability checks: 1. min number of iterations (10 by default) 2. branch is highly biased (> 15/16) However, it may still be profitable to eliminate range checks even if the branch isn't as biased. Consider, for example, a loop with 100 iterations, where IRCE currently eliminates all 100 range checks. The same range checks, if performed in a loop with 200 iterations, are not eliminated because their branch is now only 1:1. This patch proposes to relax the profitability checks of IRCE. Namely, instead of the two checks currenly in place, consider IRCE profitable if the branch probability scaled by the expected number of iterations (i.e., the estimated number of eliminated checks) is over a threshold. This covers the minimum number of iterations check (there are at least as many iterations as eliminated range checks), and changes the bias check from a percent of iterations to at least a constant threshold of eliminated checks. The effect is shown in the new test `profitability.ll`. The loop has 100 iterations (the backedge is taken 99:1). The range check's branch weights are 1:1, so current IRCE wouldn't even consider this a range check. However, with the new implementaion, setting the minimum eliminated checks as high as 50, the transformation is still applied. If the number of iterations can't be estimated, the check falls back to the current 15/16 likelihood check.
1 parent 78db4e9 commit ccdcb98

File tree

3 files changed

+92
-46
lines changed

3 files changed

+92
-46
lines changed

llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

Lines changed: 52 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
105105
static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
106106
cl::Hidden, cl::init(false));
107107

108-
static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
109-
cl::Hidden, cl::init(10));
108+
static cl::opt<unsigned> MinEliminatedChecks("irce-min-eliminated-checks",
109+
cl::Hidden, cl::init(10));
110110

111111
static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
112112
cl::Hidden, cl::init(true));
@@ -130,15 +130,9 @@ static cl::opt<bool>
130130

131131
namespace {
132132

133-
/// An inductive range check is conditional branch in a loop with
134-
///
135-
/// 1. a very cold successor (i.e. the branch jumps to that successor very
136-
/// rarely)
137-
///
138-
/// and
139-
///
140-
/// 2. a condition that is provably true for some contiguous range of values
141-
/// taken by the containing loop's induction variable.
133+
/// An inductive range check is conditional branch in a loop with a condition
134+
/// that is provably true for some contiguous range of values taken by the
135+
/// containing loop's induction variable.
142136
///
143137
class InductiveRangeCheck {
144138

@@ -233,6 +227,7 @@ class InductiveRangeCheck {
233227
/// checks, and hence don't end up in \p Checks.
234228
static void extractRangeChecksFromBranch(
235229
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
230+
std::optional<uint64_t> EstimatedTripCount,
236231
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
237232
};
238233

@@ -246,9 +241,10 @@ class InductiveRangeCheckElimination {
246241
std::optional<llvm::function_ref<llvm::BlockFrequencyInfo &()>>;
247242
GetBFIFunc GetBFI;
248243

249-
// Returns true if it is profitable to do a transform basing on estimation of
250-
// number of iterations.
251-
bool isProfitableToTransform(const Loop &L);
244+
// Returns the estimated number of iterations based on block frequency info if
245+
// available, or on branch probability info. Nullopt is returned if the number
246+
// of iterations cannot be estimated.
247+
std::optional<uint64_t> estimatedTripCount(const Loop &L);
252248

253249
public:
254250
InductiveRangeCheckElimination(ScalarEvolution &SE,
@@ -522,18 +518,40 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
522518

523519
void InductiveRangeCheck::extractRangeChecksFromBranch(
524520
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
521+
std::optional<uint64_t> EstimatedTripCount,
525522
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
526523
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
527524
return;
528525

529526
unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
530527
assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
531528
"No edges coming to loop?");
532-
BranchProbability LikelyTaken(15, 16);
533529

534-
if (!SkipProfitabilityChecks && BPI &&
535-
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
536-
return;
530+
if (!SkipProfitabilityChecks && BPI) {
531+
auto SuccessProbability =
532+
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc);
533+
if (EstimatedTripCount) {
534+
auto EstimatedEliminatedChecks =
535+
SuccessProbability.scale(*EstimatedTripCount);
536+
if (EstimatedEliminatedChecks < MinEliminatedChecks) {
537+
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
538+
<< *BI << ": "
539+
<< "estimated eliminated checks too low "
540+
<< EstimatedEliminatedChecks << "\n";);
541+
return;
542+
}
543+
} else {
544+
BranchProbability LikelyTaken(15, 16);
545+
if (SuccessProbability < LikelyTaken) {
546+
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
547+
<< *BI << ": "
548+
<< "could not estimate trip count "
549+
<< "and branch success probability too low "
550+
<< SuccessProbability << "\n";);
551+
return;
552+
}
553+
}
554+
}
537555

538556
// IRCE expects branch's true edge comes to loop. Invert branch for opposite
539557
// case.
@@ -938,42 +956,34 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
938956
return getLoopPassPreservedAnalyses();
939957
}
940958

941-
bool InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L) {
942-
if (SkipProfitabilityChecks)
943-
return true;
959+
std::optional<uint64_t>
960+
InductiveRangeCheckElimination::estimatedTripCount(const Loop &L) {
944961
if (GetBFI) {
945962
BlockFrequencyInfo &BFI = (*GetBFI)();
946963
uint64_t hFreq = BFI.getBlockFreq(L.getHeader()).getFrequency();
947964
uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
948-
if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
949-
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
950-
<< "the estimated number of iterations basing on "
951-
"frequency info is " << (hFreq / phFreq) << "\n";);
952-
return false;
953-
}
954-
return true;
965+
if (phFreq == 0 || hFreq == 0)
966+
return std::nullopt;
967+
return {hFreq / phFreq};
955968
}
956969

957970
if (!BPI)
958-
return true;
971+
return std::nullopt;
959972

960973
auto *Latch = L.getLoopLatch();
961974
if (!Latch)
962-
return true;
975+
return std::nullopt;
963976
auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
964977
if (!LatchBr)
965-
return true;
966-
auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
978+
return std::nullopt;
967979

980+
auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
968981
BranchProbability ExitProbability =
969982
BPI->getEdgeProbability(Latch, LatchBrExitIdx);
970-
if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
971-
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
972-
<< "the exit probability is too big " << ExitProbability
973-
<< "\n";);
974-
return false;
975-
}
976-
return true;
983+
if (ExitProbability.isUnknown() || ExitProbability.isZero())
984+
return std::nullopt;
985+
986+
return {ExitProbability.scaleByInverse(1)};
977987
}
978988

979989
bool InductiveRangeCheckElimination::run(
@@ -989,17 +999,15 @@ bool InductiveRangeCheckElimination::run(
989999
return false;
9901000
}
9911001

992-
if (!isProfitableToTransform(*L))
993-
return false;
994-
9951002
LLVMContext &Context = Preheader->getContext();
9961003
SmallVector<InductiveRangeCheck, 16> RangeChecks;
9971004
bool Changed = false;
9981005

1006+
auto EstimatedTripCount = estimatedTripCount(*L);
9991007
for (auto *BBI : L->getBlocks())
10001008
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
1001-
InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
1002-
RangeChecks, Changed);
1009+
InductiveRangeCheck::extractRangeChecksFromBranch(
1010+
TBI, L, SE, BPI, EstimatedTripCount, RangeChecks, Changed);
10031011

10041012
if (RangeChecks.empty())
10051013
return Changed;

llvm/test/Transforms/IRCE/low-iterations.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2-
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
1+
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2+
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
33

44
; CHECK-YES: constrained Loop
55
; CHECK-NO-NOT: constrained Loop
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=51 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2+
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=50 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
3+
4+
; CHECK-YES: constrained Loop
5+
; CHECK-NO-NOT: constrained Loop
6+
7+
declare void @bar(i32)
8+
9+
define i32 @foo(ptr %arr_a, ptr %a_len_ptr, i32 %n) {
10+
entry:
11+
%len.a = load i32, ptr %a_len_ptr, !range !0
12+
%first.itr.check = icmp sgt i32 %n, 0
13+
br i1 %first.itr.check, label %loop, label %exit, !prof !1
14+
15+
loop:
16+
%idx = phi i32 [ 0, %entry ] , [ %idx.next, %backedge ]
17+
%abc.a = icmp slt i32 %idx, %len.a
18+
br i1 %abc.a, label %in.bounds.a, label %backedge, !prof !2
19+
20+
in.bounds.a:
21+
%addr.a = getelementptr i32, ptr %arr_a, i32 %idx
22+
%val = load i32, ptr %addr.a
23+
call void @bar(i32 %val)
24+
br label %backedge
25+
26+
backedge:
27+
%idx.next = add i32 %idx, 1
28+
%next = icmp slt i32 %idx.next, %n
29+
br i1 %next, label %loop, label %exit, !prof !3
30+
31+
exit:
32+
ret i32 0
33+
}
34+
35+
!0 = !{i32 0, i32 2147483647}
36+
!1 = !{!"branch_weights", i32 1024, i32 1}
37+
!2 = !{!"branch_weights", i32 1, i32 1}
38+
!3 = !{!"branch_weights", i32 99, i32 1}

0 commit comments

Comments
 (0)