Skip to content

Commit f16553c

Browse files
committed
[IRCE] Relax profitability check
IRCE currently has two profitability checks: 1. min number of iterations (10 by default) 2. branch is highly biased (> 15/16) However, it may still be profitable to eliminate range checks even if the branch isn't as biased. Consider, for example, a loop with 100 iterations, where IRCE currently eliminates all 100 range checks. The same range checks, if performed in a loop with 200 iterations, are not eliminated because their branch is now only 1:1. This patch proposes to relax the profitability checks of IRCE. Namely, instead of the two checks currenly in place, consider IRCE profitable if the branch probability scaled by the expected number of iterations (i.e., the estimated number of eliminated checks) is over a threshold. This covers the minimum number of iterations check (there are at least as many iterations as eliminated range checks), and changes the bias check from a percent of iterations to at least a constant threshold of eliminated checks. The effect is shown in the new test `profitability.ll`. The loop has 100 iterations (the backedge is taken 99:1). The range check's branch weights are 1:1, so current IRCE wouldn't even consider this a range check. However, with the new implementaion, setting the minimum eliminated checks as high as 50, the transformation is still applied. If the number of iterations can't be estimated, the check falls back to the current 15/16 likelihood check.
1 parent e398da2 commit f16553c

File tree

3 files changed

+101
-46
lines changed

3 files changed

+101
-46
lines changed

llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

Lines changed: 61 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
107107
static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
108108
cl::Hidden, cl::init(false));
109109

110-
static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
111-
cl::Hidden, cl::init(10));
110+
static cl::opt<unsigned> MinEliminatedChecks("irce-min-eliminated-checks",
111+
cl::Hidden, cl::init(10));
112112

113113
static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
114114
cl::Hidden, cl::init(true));
@@ -132,15 +132,9 @@ static cl::opt<bool>
132132

133133
namespace {
134134

135-
/// An inductive range check is conditional branch in a loop with
136-
///
137-
/// 1. a very cold successor (i.e. the branch jumps to that successor very
138-
/// rarely)
139-
///
140-
/// and
141-
///
142-
/// 2. a condition that is provably true for some contiguous range of values
143-
/// taken by the containing loop's induction variable.
135+
/// An inductive range check is conditional branch in a loop with a condition
136+
/// that is provably true for some contiguous range of values taken by the
137+
/// containing loop's induction variable.
144138
///
145139
class InductiveRangeCheck {
146140

@@ -235,6 +229,7 @@ class InductiveRangeCheck {
235229
/// checks, and hence don't end up in \p Checks.
236230
static void extractRangeChecksFromBranch(
237231
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
232+
std::optional<uint64_t> EstimatedTripCount,
238233
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
239234
};
240235

@@ -248,9 +243,10 @@ class InductiveRangeCheckElimination {
248243
std::optional<llvm::function_ref<llvm::BlockFrequencyInfo &()>>;
249244
GetBFIFunc GetBFI;
250245

251-
// Returns true if it is profitable to do a transform basing on estimation of
252-
// number of iterations.
253-
bool isProfitableToTransform(const Loop &L, LoopStructure &LS);
246+
// Returns the estimated number of iterations based on block frequency info if
247+
// available, or on branch probability info. Nullopt is returned if the number
248+
// of iterations cannot be estimated.
249+
std::optional<uint64_t> estimatedTripCount(const Loop &L);
254250

255251
public:
256252
InductiveRangeCheckElimination(ScalarEvolution &SE,
@@ -524,18 +520,40 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
524520

525521
void InductiveRangeCheck::extractRangeChecksFromBranch(
526522
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
523+
std::optional<uint64_t> EstimatedTripCount,
527524
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
528525
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
529526
return;
530527

531528
unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
532529
assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
533530
"No edges coming to loop?");
534-
BranchProbability LikelyTaken(15, 16);
535531

536-
if (!SkipProfitabilityChecks && BPI &&
537-
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
538-
return;
532+
if (!SkipProfitabilityChecks && BPI) {
533+
auto SuccessProbability =
534+
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc);
535+
if (EstimatedTripCount) {
536+
auto EstimatedEliminatedChecks =
537+
SuccessProbability.scale(*EstimatedTripCount);
538+
if (EstimatedEliminatedChecks < MinEliminatedChecks) {
539+
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
540+
<< *BI << ": "
541+
<< "estimated eliminated checks too low "
542+
<< EstimatedEliminatedChecks << "\n";);
543+
return;
544+
}
545+
} else {
546+
BranchProbability LikelyTaken(15, 16);
547+
if (SuccessProbability < LikelyTaken) {
548+
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
549+
<< *BI << ": "
550+
<< "could not estimate trip count "
551+
<< "and branch success probability too low "
552+
<< SuccessProbability << "\n";);
553+
return;
554+
}
555+
}
556+
}
539557

540558
// IRCE expects branch's true edge comes to loop. Invert branch for opposite
541559
// case.
@@ -940,35 +958,35 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
940958
return getLoopPassPreservedAnalyses();
941959
}
942960

943-
bool
944-
InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
945-
LoopStructure &LS) {
946-
if (SkipProfitabilityChecks)
947-
return true;
961+
std::optional<uint64_t>
962+
InductiveRangeCheckElimination::estimatedTripCount(const Loop &L) {
948963
if (GetBFI) {
949964
BlockFrequencyInfo &BFI = (*GetBFI)();
950-
uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
965+
uint64_t hFreq = BFI.getBlockFreq(L.getHeader()).getFrequency();
951966
uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
952-
if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
953-
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
954-
<< "the estimated number of iterations basing on "
955-
"frequency info is " << (hFreq / phFreq) << "\n";);
956-
return false;
957-
}
958-
return true;
967+
if (phFreq == 0 || hFreq == 0)
968+
return std::nullopt;
969+
return {hFreq / phFreq};
959970
}
960971

961972
if (!BPI)
962-
return true;
973+
return std::nullopt;
974+
975+
auto *Latch = L.getLoopLatch();
976+
if (!Latch)
977+
return std::nullopt;
978+
979+
auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
980+
if (!LatchBr)
981+
return std::nullopt;
982+
983+
auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
963984
BranchProbability ExitProbability =
964-
BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx);
965-
if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
966-
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
967-
<< "the exit probability is too big " << ExitProbability
968-
<< "\n";);
969-
return false;
970-
}
971-
return true;
985+
BPI->getEdgeProbability(Latch, LatchBrExitIdx);
986+
if (ExitProbability.isUnknown() || ExitProbability.isZero())
987+
return std::nullopt;
988+
989+
return {ExitProbability.scaleByInverse(1)};
972990
}
973991

974992
bool InductiveRangeCheckElimination::run(
@@ -988,10 +1006,11 @@ bool InductiveRangeCheckElimination::run(
9881006
SmallVector<InductiveRangeCheck, 16> RangeChecks;
9891007
bool Changed = false;
9901008

1009+
auto EstimatedTripCount = estimatedTripCount(*L);
9911010
for (auto *BBI : L->getBlocks())
9921011
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
993-
InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
994-
RangeChecks, Changed);
1012+
InductiveRangeCheck::extractRangeChecksFromBranch(
1013+
TBI, L, SE, BPI, EstimatedTripCount, RangeChecks, Changed);
9951014

9961015
if (RangeChecks.empty())
9971016
return Changed;
@@ -1019,8 +1038,6 @@ bool InductiveRangeCheckElimination::run(
10191038
return Changed;
10201039
}
10211040
LoopStructure LS = *MaybeLoopStructure;
1022-
if (!isProfitableToTransform(*L, LS))
1023-
return Changed;
10241041
const SCEVAddRecExpr *IndVar =
10251042
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
10261043

llvm/test/Transforms/IRCE/low-iterations.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2-
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
1+
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2+
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
33

44
; CHECK-YES: constrained Loop
55
; CHECK-NO-NOT: constrained Loop
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=51 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2+
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=50 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
3+
4+
; CHECK-YES: constrained Loop
5+
; CHECK-NO-NOT: constrained Loop
6+
7+
declare void @bar(i32)
8+
9+
define i32 @foo(ptr %arr_a, ptr %a_len_ptr, i32 %n) {
10+
entry:
11+
%len.a = load i32, ptr %a_len_ptr, !range !0
12+
%first.itr.check = icmp sgt i32 %n, 0
13+
br i1 %first.itr.check, label %loop, label %exit, !prof !1
14+
15+
loop:
16+
%idx = phi i32 [ 0, %entry ] , [ %idx.next, %backedge ]
17+
%abc.a = icmp slt i32 %idx, %len.a
18+
br i1 %abc.a, label %in.bounds.a, label %backedge, !prof !2
19+
20+
in.bounds.a:
21+
%addr.a = getelementptr i32, ptr %arr_a, i32 %idx
22+
%val = load i32, ptr %addr.a
23+
call void @bar(i32 %val)
24+
br label %backedge
25+
26+
backedge:
27+
%idx.next = add i32 %idx, 1
28+
%next = icmp slt i32 %idx.next, %n
29+
br i1 %next, label %loop, label %exit, !prof !3
30+
31+
exit:
32+
ret i32 0
33+
}
34+
35+
!0 = !{i32 0, i32 2147483647}
36+
!1 = !{!"branch_weights", i32 1024, i32 1}
37+
!2 = !{!"branch_weights", i32 1, i32 1}
38+
!3 = !{!"branch_weights", i32 99, i32 1}

0 commit comments

Comments
 (0)