Skip to content

Commit 60d9e6f

Browse files
authored
[IRCE] Relax profitability check (#104659)
IRCE currently has two profitability checks: 1. min number of iterations (10 by default) 2. branch is highly biased (> 15/16) However, it may still be profitable to eliminate range checks even if the branch isn't as biased. Consider, for example, a loop with 100 iterations, where IRCE currently eliminates all 100 range checks. The same range checks performed over a loop with 200 iterations aren't eliminated because the branch is 50-50. This patch proposes to relax the profitability checks of IRCE. Namely, instead of the two checks currenly in place, consider IRCE profitable if the branch probability scaled by the expected number of iterations (i.e., the estimated number of eliminated checks) is over a threshold. This covers the minimum number of iterations check (there are at least as many iterations as eliminated range checks), and changes the bias check from a percent of iterations to at least a constant threshold of eliminated checks. If the number of iterations can't be estimated, the check falls back to the current 15/16 likelihood check.
1 parent 9c319d5 commit 60d9e6f

File tree

3 files changed

+98
-44
lines changed

3 files changed

+98
-44
lines changed

llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
105105
static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
106106
cl::Hidden, cl::init(false));
107107

108-
static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
109-
cl::Hidden, cl::init(10));
108+
static cl::opt<unsigned> MinEliminatedChecks("irce-min-eliminated-checks",
109+
cl::Hidden, cl::init(10));
110110

111111
static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
112112
cl::Hidden, cl::init(true));
@@ -130,15 +130,9 @@ static cl::opt<bool>
130130

131131
namespace {
132132

133-
/// An inductive range check is conditional branch in a loop with
134-
///
135-
/// 1. a very cold successor (i.e. the branch jumps to that successor very
136-
/// rarely)
137-
///
138-
/// and
139-
///
140-
/// 2. a condition that is provably true for some contiguous range of values
141-
/// taken by the containing loop's induction variable.
133+
/// An inductive range check is conditional branch in a loop with a condition
134+
/// that is provably true for some contiguous range of values taken by the
135+
/// containing loop's induction variable.
142136
///
143137
class InductiveRangeCheck {
144138

@@ -233,6 +227,7 @@ class InductiveRangeCheck {
233227
/// checks, and hence don't end up in \p Checks.
234228
static void extractRangeChecksFromBranch(
235229
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
230+
std::optional<uint64_t> EstimatedTripCount,
236231
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
237232
};
238233

@@ -246,9 +241,10 @@ class InductiveRangeCheckElimination {
246241
std::optional<llvm::function_ref<llvm::BlockFrequencyInfo &()>>;
247242
GetBFIFunc GetBFI;
248243

249-
// Returns true if it is profitable to do a transform basing on estimation of
250-
// number of iterations.
251-
bool isProfitableToTransform(const Loop &L);
244+
// Returns the estimated number of iterations based on block frequency info if
245+
// available, or on branch probability info. Nullopt is returned if the number
246+
// of iterations cannot be estimated.
247+
std::optional<uint64_t> estimatedTripCount(const Loop &L);
252248

253249
public:
254250
InductiveRangeCheckElimination(ScalarEvolution &SE,
@@ -522,18 +518,40 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
522518

523519
void InductiveRangeCheck::extractRangeChecksFromBranch(
524520
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
521+
std::optional<uint64_t> EstimatedTripCount,
525522
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
526523
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
527524
return;
528525

529526
unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
530527
assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
531528
"No edges coming to loop?");
532-
BranchProbability LikelyTaken(15, 16);
533529

534-
if (!SkipProfitabilityChecks && BPI &&
535-
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
536-
return;
530+
if (!SkipProfitabilityChecks && BPI) {
531+
auto SuccessProbability =
532+
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc);
533+
if (EstimatedTripCount) {
534+
auto EstimatedEliminatedChecks =
535+
SuccessProbability.scale(*EstimatedTripCount);
536+
if (EstimatedEliminatedChecks < MinEliminatedChecks) {
537+
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
538+
<< *BI << ": "
539+
<< "estimated eliminated checks too low "
540+
<< EstimatedEliminatedChecks << "\n";);
541+
return;
542+
}
543+
} else {
544+
BranchProbability LikelyTaken(15, 16);
545+
if (SuccessProbability < LikelyTaken) {
546+
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
547+
<< *BI << ": "
548+
<< "could not estimate trip count "
549+
<< "and branch success probability too low "
550+
<< SuccessProbability << "\n";);
551+
return;
552+
}
553+
}
554+
}
537555

538556
// IRCE expects branch's true edge comes to loop. Invert branch for opposite
539557
// case.
@@ -938,42 +956,34 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
938956
return getLoopPassPreservedAnalyses();
939957
}
940958

941-
bool InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L) {
942-
if (SkipProfitabilityChecks)
943-
return true;
959+
std::optional<uint64_t>
960+
InductiveRangeCheckElimination::estimatedTripCount(const Loop &L) {
944961
if (GetBFI) {
945962
BlockFrequencyInfo &BFI = (*GetBFI)();
946963
uint64_t hFreq = BFI.getBlockFreq(L.getHeader()).getFrequency();
947964
uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
948-
if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
949-
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
950-
<< "the estimated number of iterations basing on "
951-
"frequency info is " << (hFreq / phFreq) << "\n";);
952-
return false;
953-
}
954-
return true;
965+
if (phFreq == 0 || hFreq == 0)
966+
return std::nullopt;
967+
return {hFreq / phFreq};
955968
}
956969

957970
if (!BPI)
958-
return true;
971+
return std::nullopt;
959972

960973
auto *Latch = L.getLoopLatch();
961974
if (!Latch)
962-
return true;
975+
return std::nullopt;
963976
auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
964977
if (!LatchBr)
965-
return true;
966-
auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
978+
return std::nullopt;
967979

980+
auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
968981
BranchProbability ExitProbability =
969982
BPI->getEdgeProbability(Latch, LatchBrExitIdx);
970-
if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
971-
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
972-
<< "the exit probability is too big " << ExitProbability
973-
<< "\n";);
974-
return false;
975-
}
976-
return true;
983+
if (ExitProbability.isUnknown() || ExitProbability.isZero())
984+
return std::nullopt;
985+
986+
return {ExitProbability.scaleByInverse(1)};
977987
}
978988

979989
bool InductiveRangeCheckElimination::run(
@@ -989,17 +999,23 @@ bool InductiveRangeCheckElimination::run(
989999
return false;
9901000
}
9911001

992-
if (!isProfitableToTransform(*L))
1002+
auto EstimatedTripCount = estimatedTripCount(*L);
1003+
if (!SkipProfitabilityChecks && EstimatedTripCount &&
1004+
*EstimatedTripCount < MinEliminatedChecks) {
1005+
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
1006+
<< "the estimated number of iterations is "
1007+
<< *EstimatedTripCount << "\n");
9931008
return false;
1009+
}
9941010

9951011
LLVMContext &Context = Preheader->getContext();
9961012
SmallVector<InductiveRangeCheck, 16> RangeChecks;
9971013
bool Changed = false;
9981014

9991015
for (auto *BBI : L->getBlocks())
10001016
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
1001-
InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
1002-
RangeChecks, Changed);
1017+
InductiveRangeCheck::extractRangeChecksFromBranch(
1018+
TBI, L, SE, BPI, EstimatedTripCount, RangeChecks, Changed);
10031019

10041020
if (RangeChecks.empty())
10051021
return Changed;

llvm/test/Transforms/IRCE/low-iterations.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2-
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
1+
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2+
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
33

44
; CHECK-YES: constrained Loop
55
; CHECK-NO-NOT: constrained Loop
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=51 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
2+
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=50 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
3+
4+
; CHECK-YES: constrained Loop
5+
; CHECK-NO-NOT: constrained Loop
6+
7+
declare void @bar(i32)
8+
9+
define i32 @foo(ptr %arr_a, ptr %a_len_ptr, i32 %n) {
10+
entry:
11+
%len.a = load i32, ptr %a_len_ptr, !range !0
12+
%first.itr.check = icmp sgt i32 %n, 0
13+
br i1 %first.itr.check, label %loop, label %exit, !prof !1
14+
15+
loop:
16+
%idx = phi i32 [ 0, %entry ] , [ %idx.next, %backedge ]
17+
%abc.a = icmp slt i32 %idx, %len.a
18+
br i1 %abc.a, label %in.bounds.a, label %backedge, !prof !2
19+
20+
in.bounds.a:
21+
%addr.a = getelementptr i32, ptr %arr_a, i32 %idx
22+
%val = load i32, ptr %addr.a
23+
call void @bar(i32 %val)
24+
br label %backedge
25+
26+
backedge:
27+
%idx.next = add i32 %idx, 1
28+
%next = icmp slt i32 %idx.next, %n
29+
br i1 %next, label %loop, label %exit, !prof !3
30+
31+
exit:
32+
ret i32 0
33+
}
34+
35+
!0 = !{i32 0, i32 2147483647}
36+
!1 = !{!"branch_weights", i32 1024, i32 1}
37+
!2 = !{!"branch_weights", i32 1, i32 1}
38+
!3 = !{!"branch_weights", i32 99, i32 1}

0 commit comments

Comments
 (0)