Skip to content

[IndVars] Fix high-cost-expand check in LFTR #125828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 52 additions & 59 deletions llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,9 @@ class IndVarSimplify {
bool rewriteFirstIterationLoopExitValues(Loop *L);

bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter);
const SCEV *ExitCount, PHINode *IndVar,
Instruction *IncVar, const SCEV *IVLimit,
bool UsePostInc, SCEVExpander &Rewriter);

bool sinkUnusedInvariants(Loop *L);

Expand Down Expand Up @@ -901,73 +902,38 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
return BestPhi;
}

/// Insert an IR expression which computes the value held by the IV IndVar
/// (which must be an loop counter w/unit stride) after the backedge of loop L
/// is taken ExitCount times.
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
const SCEV *ExitCount, bool UsePostInc, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
assert(isLoopCounter(IndVar, L, SE));
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");

static const SCEV *getIVLimit(PHINode *IndVar, const SCEV *ExitCount,
bool UsePostInc, ScalarEvolution *SE) {
// For integer IVs, truncate the IV before computing the limit unless we
// know apriori that the limit must be a constant when evaluated in the
// bitwidth of the IV. We prefer (potentially) keeping a truncate of the
// IV in the loop over a (potentially) expensive expansion of the widened
// exit count add(zext(add)) expression.
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
if (IndVar->getType()->isIntegerTy() &&
SE->getTypeSizeInBits(AR->getType()) >
SE->getTypeSizeInBits(ExitCount->getType())) {
SE->getTypeSizeInBits(ExitCount->getType())) {
const SCEV *IVInit = AR->getStart();
if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
}

const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
ExitingBB->getTerminator());
AR = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
return AR->evaluateAtIteration(ExitCount, *SE);
}

/// This method rewrites the exit condition of the loop to be a canonical !=
/// comparison against the incremented loop induction variable. This pass is
/// able to rewrite the exit tests of any loop where the SCEV analysis can
/// determine a loop-invariant trip count of the loop, which is actually a much
/// broader range than just linear tests.
bool IndVarSimplify::
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter) {
assert(L->getLoopLatch() && "Loop no longer in simplified form?");
bool IndVarSimplify::linearFunctionTestReplace(
Loop *L, BasicBlock *ExitingBB, const SCEV *ExitCount, PHINode *IndVar,
Instruction *IncVar, const SCEV *IVLimit, bool UsePostInc,
SCEVExpander &Rewriter) {
assert(isLoopCounter(IndVar, L, SE));
Instruction * const IncVar =
cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));

// Initialize CmpIndVar to the preincremented IV.
Value *CmpIndVar = IndVar;
bool UsePostInc = false;

// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
if (ExitingBB == L->getLoopLatch()) {
// For pointer IVs, we chose to not strip inbounds which requires us not
// to add a potentially UB introducing use. We need to either a) show
// the loop test we're modifying is already in post-inc form, or b) show
// that adding a use must not introduce UB.
bool SafeToPostInc =
IndVar->getType()->isIntegerTy() ||
isLoopExitTestBasedOn(IncVar, ExitingBB) ||
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
if (SafeToPostInc) {
UsePostInc = true;
CmpIndVar = IncVar;
}
}

Value *CmpIndVar = UsePostInc ? IncVar : IndVar;

// It may be necessary to drop nowrap flags on the incrementing instruction
// if either LFTR moves from a pre-inc check to a post-inc check (in which
Expand All @@ -989,8 +955,11 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
}

Value *ExitCnt = genLoopLimit(
IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, IVLimit->getType(),
ExitingBB->getTerminator());
assert(ExitCnt->getType()->isPointerTy() ==
IndVar->getType()->isPointerTy() &&
"genLoopLimit missed a cast");
Expand Down Expand Up @@ -1950,8 +1919,6 @@ bool IndVarSimplify::run(Loop *L) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it.
if (!DisableLFTR) {
BasicBlock *PreHeader = L->getLoopPreheader();

SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (BasicBlock *ExitingBB : ExitingBlocks) {
Expand Down Expand Up @@ -1983,18 +1950,44 @@ bool IndVarSimplify::run(Loop *L) {
if (!IndVar)
continue;

assert(L->getLoopLatch() && "Loop no longer in simplified form?");

Instruction *IncVar = cast<Instruction>(
IndVar->getIncomingValueForBlock(L->getLoopLatch()));

// For pointer IVs, we chose to not strip inbounds which requires us not
// to add a potentially UB introducing use. We need to either a) show
// the loop test we're modifying is already in post-inc form, or b) show
// that adding a use must not introduce UB.
bool SafeToPostInc =
IndVar->getType()->isIntegerTy() ||
isLoopExitTestBasedOn(IncVar, ExitingBB) ||
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);

// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
bool UsePostInc = ExitingBB == L->getLoopLatch() && SafeToPostInc;

// IVLimit is the expression that will get expanded later.
const SCEV *IVLimit = getIVLimit(IndVar, ExitCount, UsePostInc, SE);

// Avoid high cost expansions. Note: This heuristic is questionable in
// that our definition of "high cost" is not exactly principled.
// FIXME: ExitCount is not the expression actually being expanded, but we
// check it against high-cost expansions anyway to avoid regressions.
if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
TTI, PreHeader->getTerminator()))
TTI, ExitingBB->getTerminator()) ||
Rewriter.isHighCostExpansion(IVLimit, L, SCEVCheapExpansionBudget,
TTI, ExitingBB->getTerminator()))
continue;

if (!Rewriter.isSafeToExpand(ExitCount))
if (!Rewriter.isSafeToExpand(IVLimit))
continue;

Changed |= linearFunctionTestReplace(L, ExitingBB,
ExitCount, IndVar,
Rewriter);
Changed |=
linearFunctionTestReplace(L, ExitingBB, ExitCount, IndVar, IncVar,
IVLimit, UsePostInc, Rewriter);
}
}
// Clear the rewriter cache, because values that are in the rewriter's cache
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,14 @@ define i8 @testnullptrint(ptr %buf, ptr %end) nounwind {
; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
; PTR64: preheader:
; PTR64-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1
; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
; PTR64-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP4]]
; PTR64-NEXT: br label [[LOOP:%.*]]
; PTR64: loop:
; PTR64-NEXT: [[P_01_US_US:%.*]] = phi ptr [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
; PTR64-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ]
; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[P_01_US_US]], i64 1
; PTR64-NEXT: [[SNEXT:%.*]] = load i8, ptr [[GEP]], align 1
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[GEP]], [[SCEVGEP]]
; PTR64-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ult i32 [[IVNEXT]], [[CNT]]
; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
; PTR64: exit.loopexit:
; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,15 @@ end:
define void @test_ptr(i32 %start) {
; CHECK-LABEL: @test_ptr(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[START:%.*]] to i3
; CHECK-NEXT: [[TMP1:%.*]] = sub i3 -1, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr @data, i64 [[TMP3]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY:%.*]] ], [ [[P_INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY]] ], [ [[P_INC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[I_INC]] = add nuw i32 [[I]], 1
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i8, ptr [[P]], i64 1
; CHECK-NEXT: store volatile i8 0, ptr [[P_INC]], align 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq ptr [[P_INC]], [[UGLYGEP]]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[I_INC]], 7
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[LOOP]]
; CHECK: end:
; CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,27 @@ define void @hoge(i64 %x, i64 %idx.start, ptr %ptr) {
; CHECK-LABEL: @hoge(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N:%.*]] = sdiv exact i64 [[X:%.*]], 40
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[IDX_START:%.*]], 1
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[N]]
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ [[TMP1]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH]] ], [ [[IDX_START]], [[ENTRY]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH:%.*]] ], [ [[IDX_START:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[N]], [[IDX]]
; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[INNER_PREHEADER:%.*]]
; CHECK: inner.preheader:
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[INNER]] ], [ 0, [[INNER_PREHEADER]] ]
; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNER]] ], [ [[N]], [[INNER_PREHEADER]] ]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], 1
; CHECK-NEXT: store i64 0, ptr [[PTR:%.*]], align 8
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[I_NEXT]], [[INDVARS_IV]]
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i64 [[J]], [[IDX]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[INNER_EXIT:%.*]]
; CHECK: inner_exit:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[I_NEXT]], [[INNER]] ]
; CHECK-NEXT: [[INDVAR_USE:%.*]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[IDX_NEXT]] = add nsw i64 [[IDX]], -1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br label [[HEADER]]
; CHECK: end:
; CHECK-NEXT: ret void
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/Transforms/PhaseOrdering/runtime-check-removal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@ define void @test_remove_check_with_incrementing_integer_induction(i16 %start, i
; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[LEN_N]], 0
; CHECK-NEXT: [[OR_COND3:%.*]] = and i1 [[LEN_NEG_NOT]], [[C1]]
; CHECK-NEXT: br i1 [[OR_COND3]], label [[LOOP_LATCH_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.latch.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[A]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i16 [[LEN]], -1
; CHECK-NEXT: [[UMIN:%.*]] = tail call i16 @llvm.umin.i16(i16 [[TMP0]], i16 [[TMP1]])
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
; CHECK: loop.latch:
; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[IV_NEXT:%.*]], [[LOOP_LATCH]] ], [ 0, [[LOOP_LATCH_PREHEADER]] ]
; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[IV_NEXT:%.*]], [[LOOP_LATCH_PREHEADER]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: tail call void @use(i16 [[IV2]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV2]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[IV2]], [[UMIN]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]]
; CHECK-NEXT: [[C:%.*]] = icmp ne i16 [[IV_NEXT]], [[LEN]]
; CHECK-NEXT: [[T_2:%.*]] = icmp ult i16 [[IV_NEXT]], [[A]]
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[C]], [[T_2]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_LATCH_PREHEADER]], label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand Down
Loading