Skip to content

[IRCE] Relax profitability check #104659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 58 additions & 42 deletions llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
cl::Hidden, cl::init(false));

static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
cl::Hidden, cl::init(10));
static cl::opt<unsigned> MinEliminatedChecks("irce-min-eliminated-checks",
cl::Hidden, cl::init(10));

static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
cl::Hidden, cl::init(true));
Expand All @@ -130,15 +130,9 @@ static cl::opt<bool>

namespace {

/// An inductive range check is conditional branch in a loop with
///
/// 1. a very cold successor (i.e. the branch jumps to that successor very
/// rarely)
///
/// and
///
/// 2. a condition that is provably true for some contiguous range of values
/// taken by the containing loop's induction variable.
/// An inductive range check is conditional branch in a loop with a condition
/// that is provably true for some contiguous range of values taken by the
/// containing loop's induction variable.
///
class InductiveRangeCheck {

Expand Down Expand Up @@ -233,6 +227,7 @@ class InductiveRangeCheck {
/// checks, and hence don't end up in \p Checks.
static void extractRangeChecksFromBranch(
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
std::optional<uint64_t> EstimatedTripCount,
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
};

Expand All @@ -246,9 +241,10 @@ class InductiveRangeCheckElimination {
std::optional<llvm::function_ref<llvm::BlockFrequencyInfo &()>>;
GetBFIFunc GetBFI;

// Returns true if it is profitable to do a transform basing on estimation of
// number of iterations.
bool isProfitableToTransform(const Loop &L);
// Returns the estimated number of iterations based on block frequency info if
// available, or on branch probability info. Nullopt is returned if the number
// of iterations cannot be estimated.
std::optional<uint64_t> estimatedTripCount(const Loop &L);

public:
InductiveRangeCheckElimination(ScalarEvolution &SE,
Expand Down Expand Up @@ -522,18 +518,40 @@ void InductiveRangeCheck::extractRangeChecksFromCond(

void InductiveRangeCheck::extractRangeChecksFromBranch(
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
std::optional<uint64_t> EstimatedTripCount,
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
return;

unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
"No edges coming to loop?");
BranchProbability LikelyTaken(15, 16);

if (!SkipProfitabilityChecks && BPI &&
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
return;
if (!SkipProfitabilityChecks && BPI) {
auto SuccessProbability =
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc);
if (EstimatedTripCount) {
auto EstimatedEliminatedChecks =
SuccessProbability.scale(*EstimatedTripCount);
if (EstimatedEliminatedChecks < MinEliminatedChecks) {
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
<< *BI << ": "
<< "estimated eliminated checks too low "
<< EstimatedEliminatedChecks << "\n";);
return;
}
} else {
BranchProbability LikelyTaken(15, 16);
if (SuccessProbability < LikelyTaken) {
LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
<< *BI << ": "
<< "could not estimate trip count "
<< "and branch success probability too low "
<< SuccessProbability << "\n";);
return;
}
}
}

// IRCE expects branch's true edge comes to loop. Invert branch for opposite
// case.
Expand Down Expand Up @@ -938,42 +956,34 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
return getLoopPassPreservedAnalyses();
}

bool InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L) {
if (SkipProfitabilityChecks)
return true;
std::optional<uint64_t>
InductiveRangeCheckElimination::estimatedTripCount(const Loop &L) {
if (GetBFI) {
BlockFrequencyInfo &BFI = (*GetBFI)();
uint64_t hFreq = BFI.getBlockFreq(L.getHeader()).getFrequency();
uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
<< "the estimated number of iterations basing on "
"frequency info is " << (hFreq / phFreq) << "\n";);
return false;
}
return true;
if (phFreq == 0 || hFreq == 0)
return std::nullopt;
return {hFreq / phFreq};
}

if (!BPI)
return true;
return std::nullopt;

auto *Latch = L.getLoopLatch();
if (!Latch)
return true;
return std::nullopt;
auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
if (!LatchBr)
return true;
auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
return std::nullopt;

auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
BranchProbability ExitProbability =
BPI->getEdgeProbability(Latch, LatchBrExitIdx);
if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
<< "the exit probability is too big " << ExitProbability
<< "\n";);
return false;
}
return true;
if (ExitProbability.isUnknown() || ExitProbability.isZero())
return std::nullopt;

return {ExitProbability.scaleByInverse(1)};
}

bool InductiveRangeCheckElimination::run(
Expand All @@ -989,17 +999,23 @@ bool InductiveRangeCheckElimination::run(
return false;
}

if (!isProfitableToTransform(*L))
auto EstimatedTripCount = estimatedTripCount(*L);
if (!SkipProfitabilityChecks && EstimatedTripCount &&
*EstimatedTripCount < MinEliminatedChecks) {
LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
<< "the estimated number of iterations is "
<< *EstimatedTripCount << "\n");
return false;
}

LLVMContext &Context = Preheader->getContext();
SmallVector<InductiveRangeCheck, 16> RangeChecks;
bool Changed = false;

for (auto *BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
RangeChecks, Changed);
InductiveRangeCheck::extractRangeChecksFromBranch(
TBI, L, SE, BPI, EstimatedTripCount, RangeChecks, Changed);

if (RangeChecks.empty())
return Changed;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/IRCE/low-iterations.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES

; CHECK-YES: constrained Loop
; CHECK-NO-NOT: constrained Loop
Expand Down
38 changes: 38 additions & 0 deletions llvm/test/Transforms/IRCE/profitability.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=51 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=50 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES

; CHECK-YES: constrained Loop
; CHECK-NO-NOT: constrained Loop

declare void @bar(i32)

define i32 @foo(ptr %arr_a, ptr %a_len_ptr, i32 %n) {
entry:
%len.a = load i32, ptr %a_len_ptr, !range !0
%first.itr.check = icmp sgt i32 %n, 0
br i1 %first.itr.check, label %loop, label %exit, !prof !1

loop:
%idx = phi i32 [ 0, %entry ] , [ %idx.next, %backedge ]
%abc.a = icmp slt i32 %idx, %len.a
br i1 %abc.a, label %in.bounds.a, label %backedge, !prof !2

in.bounds.a:
%addr.a = getelementptr i32, ptr %arr_a, i32 %idx
%val = load i32, ptr %addr.a
call void @bar(i32 %val)
br label %backedge

backedge:
%idx.next = add i32 %idx, 1
%next = icmp slt i32 %idx.next, %n
br i1 %next, label %loop, label %exit, !prof !3

exit:
ret i32 0
}

!0 = !{i32 0, i32 2147483647}
!1 = !{!"branch_weights", i32 1024, i32 1}
!2 = !{!"branch_weights", i32 1, i32 1}
!3 = !{!"branch_weights", i32 99, i32 1}
Loading