Skip to content

Commit 13f16d1

Browse files
committed
IndVarSimplify: don't high-cost-expand in genLoopLimit
Guard against high-cost expansions in genLoopLimit, by checking IVLimit against SCEVExpander::isHighCostExpansion.
1 parent 16f7e96 commit 13f16d1

File tree

7 files changed

+98
-105
lines changed

7 files changed

+98
-105
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 49 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,9 @@ class IndVarSimplify {
154154
bool rewriteFirstIterationLoopExitValues(Loop *L);
155155

156156
bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157-
const SCEV *ExitCount,
158-
PHINode *IndVar, SCEVExpander &Rewriter);
157+
const SCEV *ExitCount, PHINode *IndVar,
158+
Instruction *IncVar, const SCEV *IVLimit,
159+
bool UsePostInc, SCEVExpander &Rewriter);
159160

160161
bool sinkUnusedInvariants(Loop *L);
161162

@@ -907,73 +908,38 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
907908
return BestPhi;
908909
}
909910

910-
/// Insert an IR expression which computes the value held by the IV IndVar
911-
/// (which must be an loop counter w/unit stride) after the backedge of loop L
912-
/// is taken ExitCount times.
913-
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
914-
const SCEV *ExitCount, bool UsePostInc, Loop *L,
915-
SCEVExpander &Rewriter, ScalarEvolution *SE) {
916-
assert(isLoopCounter(IndVar, L, SE));
917-
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
918-
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
919-
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
920-
911+
static const SCEV *getIVLimit(PHINode *IndVar, const SCEV *ExitCount,
912+
bool UsePostInc, ScalarEvolution *SE) {
921913
// For integer IVs, truncate the IV before computing the limit unless we
922914
// know apriori that the limit must be a constant when evaluated in the
923915
// bitwidth of the IV. We prefer (potentially) keeping a truncate of the
924916
// IV in the loop over a (potentially) expensive expansion of the widened
925917
// exit count add(zext(add)) expression.
918+
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
919+
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
926920
if (IndVar->getType()->isIntegerTy() &&
927921
SE->getTypeSizeInBits(AR->getType()) >
928-
SE->getTypeSizeInBits(ExitCount->getType())) {
922+
SE->getTypeSizeInBits(ExitCount->getType())) {
929923
const SCEV *IVInit = AR->getStart();
930924
if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
931925
AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
932926
}
933-
934-
const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
935-
const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
936-
assert(SE->isLoopInvariant(IVLimit, L) &&
937-
"Computed iteration count is not loop invariant!");
938-
return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
939-
ExitingBB->getTerminator());
927+
AR = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
928+
return AR->evaluateAtIteration(ExitCount, *SE);
940929
}
941930

942931
/// This method rewrites the exit condition of the loop to be a canonical !=
943932
/// comparison against the incremented loop induction variable. This pass is
944933
/// able to rewrite the exit tests of any loop where the SCEV analysis can
945934
/// determine a loop-invariant trip count of the loop, which is actually a much
946935
/// broader range than just linear tests.
947-
bool IndVarSimplify::
948-
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
949-
const SCEV *ExitCount,
950-
PHINode *IndVar, SCEVExpander &Rewriter) {
951-
assert(L->getLoopLatch() && "Loop no longer in simplified form?");
936+
bool IndVarSimplify::linearFunctionTestReplace(
937+
Loop *L, BasicBlock *ExitingBB, const SCEV *ExitCount, PHINode *IndVar,
938+
Instruction *IncVar, const SCEV *IVLimit, bool UsePostInc,
939+
SCEVExpander &Rewriter) {
952940
assert(isLoopCounter(IndVar, L, SE));
953-
Instruction * const IncVar =
954-
cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
955-
956-
// Initialize CmpIndVar to the preincremented IV.
957-
Value *CmpIndVar = IndVar;
958-
bool UsePostInc = false;
959-
960-
// If the exiting block is the same as the backedge block, we prefer to
961-
// compare against the post-incremented value, otherwise we must compare
962-
// against the preincremented value.
963-
if (ExitingBB == L->getLoopLatch()) {
964-
// For pointer IVs, we chose to not strip inbounds which requires us not
965-
// to add a potentially UB introducing use. We need to either a) show
966-
// the loop test we're modifying is already in post-inc form, or b) show
967-
// that adding a use must not introduce UB.
968-
bool SafeToPostInc =
969-
IndVar->getType()->isIntegerTy() ||
970-
isLoopExitTestBasedOn(IncVar, ExitingBB) ||
971-
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
972-
if (SafeToPostInc) {
973-
UsePostInc = true;
974-
CmpIndVar = IncVar;
975-
}
976-
}
941+
942+
Value *CmpIndVar = UsePostInc ? IncVar : IndVar;
977943

978944
// It may be necessary to drop nowrap flags on the incrementing instruction
979945
// if either LFTR moves from a pre-inc check to a post-inc check (in which
@@ -995,8 +961,11 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
995961
BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
996962
}
997963

998-
Value *ExitCnt = genLoopLimit(
999-
IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
964+
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
965+
assert(SE->isLoopInvariant(IVLimit, L) &&
966+
"Computed iteration count is not loop invariant!");
967+
Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, IVLimit->getType(),
968+
ExitingBB->getTerminator());
1000969
assert(ExitCnt->getType()->isPointerTy() ==
1001970
IndVar->getType()->isPointerTy() &&
1002971
"genLoopLimit missed a cast");
@@ -1968,8 +1937,6 @@ bool IndVarSimplify::run(Loop *L) {
19681937
// If we have a trip count expression, rewrite the loop's exit condition
19691938
// using it.
19701939
if (!DisableLFTR) {
1971-
BasicBlock *PreHeader = L->getLoopPreheader();
1972-
19731940
SmallVector<BasicBlock*, 16> ExitingBlocks;
19741941
L->getExitingBlocks(ExitingBlocks);
19751942
for (BasicBlock *ExitingBB : ExitingBlocks) {
@@ -2001,18 +1968,40 @@ bool IndVarSimplify::run(Loop *L) {
20011968
if (!IndVar)
20021969
continue;
20031970

1971+
assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1972+
1973+
Instruction *IncVar = cast<Instruction>(
1974+
IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1975+
1976+
// For pointer IVs, we chose to not strip inbounds which requires us not
1977+
// to add a potentially UB introducing use. We need to either a) show
1978+
// the loop test we're modifying is already in post-inc form, or b) show
1979+
// that adding a use must not introduce UB.
1980+
bool SafeToPostInc =
1981+
IndVar->getType()->isIntegerTy() ||
1982+
isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1983+
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1984+
1985+
// If the exiting block is the same as the backedge block, we prefer to
1986+
// compare against the post-incremented value, otherwise we must compare
1987+
// against the preincremented value.
1988+
bool UsePostInc = ExitingBB == L->getLoopLatch() && SafeToPostInc;
1989+
1990+
// IVLimit is the expression that will get expanded later.
1991+
const SCEV *IVLimit = getIVLimit(IndVar, ExitCount, UsePostInc, SE);
1992+
20041993
// Avoid high cost expansions. Note: This heuristic is questionable in
20051994
// that our definition of "high cost" is not exactly principled.
2006-
if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2007-
TTI, PreHeader->getTerminator()))
1995+
if (Rewriter.isHighCostExpansion(IVLimit, L, SCEVCheapExpansionBudget,
1996+
TTI, ExitingBB->getTerminator()))
20081997
continue;
20091998

2010-
if (!Rewriter.isSafeToExpand(ExitCount))
1999+
if (!Rewriter.isSafeToExpand(IVLimit))
20112000
continue;
20122001

2013-
Changed |= linearFunctionTestReplace(L, ExitingBB,
2014-
ExitCount, IndVar,
2015-
Rewriter);
2002+
Changed |=
2003+
linearFunctionTestReplace(L, ExitingBB, ExitCount, IndVar, IncVar,
2004+
IVLimit, UsePostInc, Rewriter);
20162005
}
20172006
}
20182007
// Clear the rewriter cache, because values that are in the rewriter's cache

llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,14 @@ define i8 @testnullptrint(ptr %buf, ptr %end) nounwind {
158158
; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
159159
; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
160160
; PTR64: preheader:
161-
; PTR64-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1
162-
; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
163-
; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
164-
; PTR64-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
165-
; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP4]]
166161
; PTR64-NEXT: br label [[LOOP:%.*]]
167162
; PTR64: loop:
168163
; PTR64-NEXT: [[P_01_US_US:%.*]] = phi ptr [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
164+
; PTR64-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ]
169165
; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, ptr [[P_01_US_US]], i64 1
170166
; PTR64-NEXT: [[SNEXT:%.*]] = load i8, ptr [[GEP]], align 1
171-
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[GEP]], [[SCEVGEP]]
167+
; PTR64-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1
168+
; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ult i32 [[IVNEXT]], [[CNT]]
172169
; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
173170
; PTR64: exit.loopexit:
174171
; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]

llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,11 +521,13 @@ for.end: ; preds = %if.end, %entry
521521
define void @test3_neg(i64 %start) {
522522
; CHECK-LABEL: @test3_neg(
523523
; CHECK-NEXT: entry:
524+
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 -1)
525+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[SMAX]], 1
524526
; CHECK-NEXT: br label [[LOOP:%.*]]
525527
; CHECK: loop:
526-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
527-
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
528-
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1
528+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
529+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
530+
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
529531
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]]
530532
; CHECK: for.end:
531533
; CHECK-NEXT: ret void

llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,15 @@ end:
4141
define void @test_ptr(i32 %start) {
4242
; CHECK-LABEL: @test_ptr(
4343
; CHECK-NEXT: entry:
44-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[START:%.*]] to i3
45-
; CHECK-NEXT: [[TMP1:%.*]] = sub i3 -1, [[TMP0]]
46-
; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[TMP1]] to i64
47-
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
48-
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr @data, i64 [[TMP3]]
4944
; CHECK-NEXT: br label [[LOOP:%.*]]
5045
; CHECK: loop:
51-
; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY:%.*]] ], [ [[P_INC:%.*]], [[LOOP]] ]
46+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
47+
; CHECK-NEXT: [[P:%.*]] = phi ptr [ @data, [[ENTRY]] ], [ [[P_INC:%.*]], [[LOOP]] ]
48+
; CHECK-NEXT: [[I_INC]] = add nuw i32 [[I]], 1
5249
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i8, ptr [[P]], i64 1
5350
; CHECK-NEXT: store volatile i8 0, ptr [[P_INC]], align 1
54-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq ptr [[P_INC]], [[UGLYGEP]]
51+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[I_INC]], 7
52+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[AND]], 0
5553
; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[LOOP]]
5654
; CHECK: end:
5755
; CHECK-NEXT: ret void

llvm/test/Transforms/IndVarSimplify/post-inc-range.ll

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ define void @test_range_metadata(ptr %array_length_ptr, ptr %base,
115115
; CHECK-LABEL: @test_range_metadata(
116116
; CHECK-NEXT: for.body.lr.ph:
117117
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
118-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
118+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1
119+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
119120
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
120121
; CHECK: for.body:
121122
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -127,7 +128,8 @@ define void @test_range_metadata(ptr %array_length_ptr, ptr %base,
127128
; CHECK-NEXT: br label [[FOR_INC]]
128129
; CHECK: for.inc:
129130
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
130-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
131+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
132+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
131133
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
132134
; CHECK: for.end:
133135
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -221,10 +223,11 @@ define void @test_transitive_use(ptr %base, i32 %limit, i32 %start) {
221223
; CHECK-LABEL: @test_transitive_use(
222224
; CHECK-NEXT: for.body.lr.ph:
223225
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
224-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
225-
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[LIMIT]] to i64
226+
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[LIMIT:%.*]] to i64
226227
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START]], i32 64)
227228
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64
229+
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[START]], 1
230+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT]], i32 [[TMP5]])
228231
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
229232
; CHECK: for.body:
230233
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -236,13 +239,14 @@ define void @test_transitive_use(ptr %base, i32 %limit, i32 %start) {
236239
; CHECK-NEXT: br i1 [[MUL_WITHIN]], label [[GUARDED:%.*]], label [[CONTINUE_2:%.*]]
237240
; CHECK: guarded:
238241
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
239-
; CHECK-NEXT: [[RESULT:%.*]] = icmp slt i64 [[TMP4]], [[TMP1]]
242+
; CHECK-NEXT: [[RESULT:%.*]] = icmp slt i64 [[TMP4]], [[TMP2]]
240243
; CHECK-NEXT: br i1 [[RESULT]], label [[CONTINUE_2]], label [[FOR_END]]
241244
; CHECK: continue.2:
242245
; CHECK-NEXT: br label [[FOR_INC]]
243246
; CHECK: for.inc:
244247
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
245-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]]
248+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
249+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
246250
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
247251
; CHECK: for.end:
248252
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -293,14 +297,16 @@ define void @test_guard_one_bb(ptr %base, i32 %limit, i32 %start) {
293297
; CHECK-LABEL: @test_guard_one_bb(
294298
; CHECK-NEXT: for.body.lr.ph:
295299
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
296-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
300+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1
301+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
297302
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
298303
; CHECK: for.body:
299304
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
300305
; CHECK-NEXT: [[WITHIN_LIMITS:%.*]] = icmp ult i64 [[INDVARS_IV]], 64
301306
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ]
302307
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
303-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
308+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
309+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
304310
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
305311
; CHECK: for.end:
306312
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -333,7 +339,8 @@ define void @test_guard_in_the_same_bb(ptr %base, i32 %limit, i32 %start) {
333339
; CHECK-LABEL: @test_guard_in_the_same_bb(
334340
; CHECK-NEXT: for.body.lr.ph:
335341
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
336-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
342+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1
343+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
337344
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
338345
; CHECK: for.body:
339346
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -342,7 +349,8 @@ define void @test_guard_in_the_same_bb(ptr %base, i32 %limit, i32 %start) {
342349
; CHECK: for.inc:
343350
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ]
344351
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
345-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
352+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
353+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
346354
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
347355
; CHECK: for.end:
348356
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -378,7 +386,8 @@ define void @test_guard_in_idom(ptr %base, i32 %limit, i32 %start) {
378386
; CHECK-LABEL: @test_guard_in_idom(
379387
; CHECK-NEXT: for.body.lr.ph:
380388
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
381-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
389+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1
390+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
382391
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
383392
; CHECK: for.body:
384393
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -387,7 +396,8 @@ define void @test_guard_in_idom(ptr %base, i32 %limit, i32 %start) {
387396
; CHECK-NEXT: br label [[FOR_INC]]
388397
; CHECK: for.inc:
389398
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
390-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
399+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
400+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
391401
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
392402
; CHECK: for.end:
393403
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -423,7 +433,8 @@ define void @test_guard_merge_ranges(ptr %base, i32 %limit, i32 %start) {
423433
; CHECK-LABEL: @test_guard_merge_ranges(
424434
; CHECK-NEXT: for.body.lr.ph:
425435
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
426-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
436+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1
437+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
427438
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
428439
; CHECK: for.body:
429440
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -432,7 +443,8 @@ define void @test_guard_merge_ranges(ptr %base, i32 %limit, i32 %start) {
432443
; CHECK-NEXT: [[WITHIN_LIMITS_2:%.*]] = icmp ult i64 [[INDVARS_IV]], 2147483647
433444
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS_2]]) [ "deopt"() ]
434445
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
435-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
446+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
447+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
436448
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
437449
; CHECK: for.end:
438450
; CHECK-NEXT: br label [[EXIT:%.*]]

0 commit comments

Comments
 (0)