Skip to content

Commit 6d428b6

Browse files
committed
IndVarSimplify: don't high-cost-expand in genLoopLimit
Guard against high-cost expansions in genLoopLimit, by checking IVLimit against SCEVExpander::isHighCostExpansion.
1 parent c1ac87b commit 6d428b6

File tree

5 files changed

+70
-87
lines changed

5 files changed

+70
-87
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 52 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,9 @@ class IndVarSimplify {
155155
bool rewriteFirstIterationLoopExitValues(Loop *L);
156156

157157
bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
158-
const SCEV *ExitCount,
159-
PHINode *IndVar, SCEVExpander &Rewriter);
158+
const SCEV *ExitCount, PHINode *IndVar,
159+
Instruction *IncVar, const SCEV *IVLimit,
160+
bool UsePostInc, SCEVExpander &Rewriter);
160161

161162
bool sinkUnusedInvariants(Loop *L);
162163

@@ -901,73 +902,38 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
901902
return BestPhi;
902903
}
903904

904-
/// Insert an IR expression which computes the value held by the IV IndVar
905-
/// (which must be an loop counter w/unit stride) after the backedge of loop L
906-
/// is taken ExitCount times.
907-
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
908-
const SCEV *ExitCount, bool UsePostInc, Loop *L,
909-
SCEVExpander &Rewriter, ScalarEvolution *SE) {
910-
assert(isLoopCounter(IndVar, L, SE));
911-
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
912-
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
913-
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
914-
905+
static const SCEV *getIVLimit(PHINode *IndVar, const SCEV *ExitCount,
906+
bool UsePostInc, ScalarEvolution *SE) {
915907
// For integer IVs, truncate the IV before computing the limit unless we
916908
// know apriori that the limit must be a constant when evaluated in the
917909
// bitwidth of the IV. We prefer (potentially) keeping a truncate of the
918910
// IV in the loop over a (potentially) expensive expansion of the widened
919911
// exit count add(zext(add)) expression.
912+
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
913+
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
920914
if (IndVar->getType()->isIntegerTy() &&
921915
SE->getTypeSizeInBits(AR->getType()) >
922-
SE->getTypeSizeInBits(ExitCount->getType())) {
916+
SE->getTypeSizeInBits(ExitCount->getType())) {
923917
const SCEV *IVInit = AR->getStart();
924918
if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
925919
AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
926920
}
927-
928-
const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
929-
const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
930-
assert(SE->isLoopInvariant(IVLimit, L) &&
931-
"Computed iteration count is not loop invariant!");
932-
return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
933-
ExitingBB->getTerminator());
921+
AR = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
922+
return AR->evaluateAtIteration(ExitCount, *SE);
934923
}
935924

936925
/// This method rewrites the exit condition of the loop to be a canonical !=
937926
/// comparison against the incremented loop induction variable. This pass is
938927
/// able to rewrite the exit tests of any loop where the SCEV analysis can
939928
/// determine a loop-invariant trip count of the loop, which is actually a much
940929
/// broader range than just linear tests.
941-
bool IndVarSimplify::
942-
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
943-
const SCEV *ExitCount,
944-
PHINode *IndVar, SCEVExpander &Rewriter) {
945-
assert(L->getLoopLatch() && "Loop no longer in simplified form?");
930+
bool IndVarSimplify::linearFunctionTestReplace(
931+
Loop *L, BasicBlock *ExitingBB, const SCEV *ExitCount, PHINode *IndVar,
932+
Instruction *IncVar, const SCEV *IVLimit, bool UsePostInc,
933+
SCEVExpander &Rewriter) {
946934
assert(isLoopCounter(IndVar, L, SE));
947-
Instruction * const IncVar =
948-
cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
949-
950-
// Initialize CmpIndVar to the preincremented IV.
951-
Value *CmpIndVar = IndVar;
952-
bool UsePostInc = false;
953-
954-
// If the exiting block is the same as the backedge block, we prefer to
955-
// compare against the post-incremented value, otherwise we must compare
956-
// against the preincremented value.
957-
if (ExitingBB == L->getLoopLatch()) {
958-
// For pointer IVs, we chose to not strip inbounds which requires us not
959-
// to add a potentially UB introducing use. We need to either a) show
960-
// the loop test we're modifying is already in post-inc form, or b) show
961-
// that adding a use must not introduce UB.
962-
bool SafeToPostInc =
963-
IndVar->getType()->isIntegerTy() ||
964-
isLoopExitTestBasedOn(IncVar, ExitingBB) ||
965-
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
966-
if (SafeToPostInc) {
967-
UsePostInc = true;
968-
CmpIndVar = IncVar;
969-
}
970-
}
935+
936+
Value *CmpIndVar = UsePostInc ? IncVar : IndVar;
971937

972938
// It may be necessary to drop nowrap flags on the incrementing instruction
973939
// if either LFTR moves from a pre-inc check to a post-inc check (in which
@@ -989,8 +955,11 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
989955
BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
990956
}
991957

992-
Value *ExitCnt = genLoopLimit(
993-
IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
958+
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
959+
assert(SE->isLoopInvariant(IVLimit, L) &&
960+
"Computed iteration count is not loop invariant!");
961+
Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, IVLimit->getType(),
962+
ExitingBB->getTerminator());
994963
assert(ExitCnt->getType()->isPointerTy() ==
995964
IndVar->getType()->isPointerTy() &&
996965
"genLoopLimit missed a cast");
@@ -1950,8 +1919,6 @@ bool IndVarSimplify::run(Loop *L) {
19501919
// If we have a trip count expression, rewrite the loop's exit condition
19511920
// using it.
19521921
if (!DisableLFTR) {
1953-
BasicBlock *PreHeader = L->getLoopPreheader();
1954-
19551922
SmallVector<BasicBlock*, 16> ExitingBlocks;
19561923
L->getExitingBlocks(ExitingBlocks);
19571924
for (BasicBlock *ExitingBB : ExitingBlocks) {
@@ -1983,18 +1950,44 @@ bool IndVarSimplify::run(Loop *L) {
19831950
if (!IndVar)
19841951
continue;
19851952

1953+
assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1954+
1955+
Instruction *IncVar = cast<Instruction>(
1956+
IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1957+
1958+
// For pointer IVs, we chose to not strip inbounds which requires us not
1959+
// to add a potentially UB introducing use. We need to either a) show
1960+
// the loop test we're modifying is already in post-inc form, or b) show
1961+
// that adding a use must not introduce UB.
1962+
bool SafeToPostInc =
1963+
IndVar->getType()->isIntegerTy() ||
1964+
isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1965+
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1966+
1967+
// If the exiting block is the same as the backedge block, we prefer to
1968+
// compare against the post-incremented value, otherwise we must compare
1969+
// against the preincremented value.
1970+
bool UsePostInc = ExitingBB == L->getLoopLatch() && SafeToPostInc;
1971+
1972+
// IVLimit is the expression that will get expanded later.
1973+
const SCEV *IVLimit = getIVLimit(IndVar, ExitCount, UsePostInc, SE);
1974+
19861975
// Avoid high cost expansions. Note: This heuristic is questionable in
19871976
// that our definition of "high cost" is not exactly principled.
1977+
// FIXME: ExitCount is not the expression actually being expanded, but we
1978+
// check it against high-cost expansions anyway to avoid regressions.
19881979
if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1989-
TTI, PreHeader->getTerminator()))
1980+
TTI, ExitingBB->getTerminator()) ||
1981+
Rewriter.isHighCostExpansion(IVLimit, L, SCEVCheapExpansionBudget,
1982+
TTI, ExitingBB->getTerminator()))
19901983
continue;
19911984

1992-
if (!Rewriter.isSafeToExpand(ExitCount))
1985+
if (!Rewriter.isSafeToExpand(IVLimit))
19931986
continue;
19941987

1995-
Changed |= linearFunctionTestReplace(L, ExitingBB,
1996-
ExitCount, IndVar,
1997-
Rewriter);
1988+
Changed |=
1989+
linearFunctionTestReplace(L, ExitingBB, ExitCount, IndVar, IncVar,
1990+
IVLimit, UsePostInc, Rewriter);
19981991
}
19991992
}
20001993
// Clear the rewriter cache, because values that are in the rewriter's cache

llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,14 @@ define i8 @testnullptrint(ptr %buf, ptr %end) nounwind {
158158
; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
159159
; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
160160
; PTR64: preheader:
161-
; PTR64-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1
162-
; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
163-
; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
164-
; PTR64-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
165-
; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP4]]
166161
; PTR64-NEXT: br label [[LOOP:%.*]]
167162
; PTR64: loop:
168163
; PTR64-NEXT: [[P_01_US_US:%.*]] = phi ptr [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
164+
; PTR64-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ]
169165
; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[P_01_US_US]], i64 1
170166
; PTR64-NEXT: [[SNEXT:%.*]] = load i8, ptr [[GEP]], align 1
171-
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[GEP]], [[SCEVGEP]]
167+
; PTR64-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1
168+
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ult i32 [[IVNEXT]], [[CNT]]
172169
; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
173170
; PTR64: exit.loopexit:
174171
; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]

llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,15 @@ end:
4141
define void @test_ptr(i32 %start) {
4242
; CHECK-LABEL: @test_ptr(
4343
; CHECK-NEXT: entry:
44-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[START:%.*]] to i3
45-
; CHECK-NEXT: [[TMP1:%.*]] = sub i3 -1, [[TMP0]]
46-
; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[TMP1]] to i64
47-
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
48-
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr @data, i64 [[TMP3]]
4944
; CHECK-NEXT: br label [[LOOP:%.*]]
5045
; CHECK: loop:
51-
; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY:%.*]] ], [ [[P_INC:%.*]], [[LOOP]] ]
46+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
47+
; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY]] ], [ [[P_INC:%.*]], [[LOOP]] ]
48+
; CHECK-NEXT: [[I_INC]] = add nuw i32 [[I]], 1
5249
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i8, ptr [[P]], i64 1
5350
; CHECK-NEXT: store volatile i8 0, ptr [[P_INC]], align 1
54-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq ptr [[P_INC]], [[UGLYGEP]]
51+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[I_INC]], 7
52+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[AND]], 0
5553
; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[LOOP]]
5654
; CHECK: end:
5755
; CHECK-NEXT: ret void

llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,27 @@ define void @hoge(i64 %x, i64 %idx.start, ptr %ptr) {
1111
; CHECK-LABEL: @hoge(
1212
; CHECK-NEXT: entry:
1313
; CHECK-NEXT: [[N:%.*]] = sdiv exact i64 [[X:%.*]], 40
14-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[IDX_START:%.*]], 1
15-
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[N]]
1614
; CHECK-NEXT: br label [[HEADER:%.*]]
1715
; CHECK: header:
18-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ [[TMP1]], [[ENTRY:%.*]] ]
19-
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH]] ], [ [[IDX_START]], [[ENTRY]] ]
16+
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH:%.*]] ], [ [[IDX_START:%.*]], [[ENTRY:%.*]] ]
2017
; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[N]], [[IDX]]
2118
; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[INNER_PREHEADER:%.*]]
2219
; CHECK: inner.preheader:
2320
; CHECK-NEXT: br label [[INNER:%.*]]
2421
; CHECK: inner:
2522
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[INNER]] ], [ 0, [[INNER_PREHEADER]] ]
26-
; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1
23+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNER]] ], [ [[N]], [[INNER_PREHEADER]] ]
24+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
25+
; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], 1
2726
; CHECK-NEXT: store i64 0, ptr [[PTR:%.*]], align 8
28-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[I_NEXT]], [[INDVARS_IV]]
27+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i64 [[J]], [[IDX]]
2928
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[INNER_EXIT:%.*]]
3029
; CHECK: inner_exit:
3130
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[I_NEXT]], [[INNER]] ]
3231
; CHECK-NEXT: [[INDVAR_USE:%.*]] = add i64 [[INDVAR]], 1
3332
; CHECK-NEXT: br label [[LATCH]]
3433
; CHECK: latch:
3534
; CHECK-NEXT: [[IDX_NEXT]] = add nsw i64 [[IDX]], -1
36-
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
3735
; CHECK-NEXT: br label [[HEADER]]
3836
; CHECK: end:
3937
; CHECK-NEXT: ret void

llvm/test/Transforms/PhaseOrdering/runtime-check-removal.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,14 @@ define void @test_remove_check_with_incrementing_integer_induction(i16 %start, i
1414
; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[LEN_N]], 0
1515
; CHECK-NEXT: [[OR_COND3:%.*]] = and i1 [[LEN_NEG_NOT]], [[C1]]
1616
; CHECK-NEXT: br i1 [[OR_COND3]], label [[LOOP_LATCH_PREHEADER:%.*]], label [[EXIT:%.*]]
17-
; CHECK: loop.latch.preheader:
18-
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[A]], -1
19-
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i16 [[LEN]], -1
20-
; CHECK-NEXT: [[UMIN:%.*]] = tail call i16 @llvm.umin.i16(i16 [[TMP0]], i16 [[TMP1]])
21-
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
2217
; CHECK: loop.latch:
23-
; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[IV_NEXT:%.*]], [[LOOP_LATCH]] ], [ 0, [[LOOP_LATCH_PREHEADER]] ]
18+
; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[IV_NEXT:%.*]], [[LOOP_LATCH_PREHEADER]] ], [ 0, [[ENTRY:%.*]] ]
2419
; CHECK-NEXT: tail call void @use(i16 [[IV2]])
2520
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV2]], 1
26-
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[IV2]], [[UMIN]]
27-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]]
21+
; CHECK-NEXT: [[C:%.*]] = icmp ne i16 [[IV_NEXT]], [[LEN]]
22+
; CHECK-NEXT: [[T_2:%.*]] = icmp ult i16 [[IV_NEXT]], [[A]]
23+
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[C]], [[T_2]]
24+
; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_LATCH_PREHEADER]], label [[EXIT]]
2825
; CHECK: exit:
2926
; CHECK-NEXT: ret void
3027
;

0 commit comments

Comments
 (0)