Skip to content

Commit 9306a03

Browse files
committed
[LoopPeel] Use loop guards when checking if last iter can be peeled.
Apply loop guards to BTC before checking if the last iteration should be peeled off. This also adds an assert to make sure applying the guards does not pessimize the results. I checked on a large test set and it did not trigger there, but it adds an additional guard to catch potential cases where loop-guards pessimize results. Peels ~15% more loops.
1 parent d204aa9 commit 9306a03

File tree

2 files changed

+41
-14
lines changed

2 files changed

+41
-14
lines changed

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -357,17 +357,14 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
357357
m_scev_AffineAddRec(m_SCEV(), m_scev_One(), m_SpecificLoop(&L)));
358358
}
359359

360-
/// Returns true if the last iteration can be peeled off and the condition (Pred
361-
/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition
362-
/// is known at the second-to-last.
360+
/// Returns true if the last iteration should be peeled off, i.e. the condition
361+
/// (Pred LeftAR, RightSCEV) is known at the last iteration and the inverse
362+
/// condition is known at the second-to-last.
363363
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
364364
const SCEVAddRecExpr *LeftAR,
365-
const SCEV *RightSCEV, ScalarEvolution &SE,
365+
const SCEV *RightSCEV, const SCEV *BTC,
366+
ScalarEvolution &SE,
366367
const TargetTransformInfo &TTI) {
367-
if (!canPeelLastIteration(L, SE))
368-
return false;
369-
370-
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
371368
SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
372369
if (!SE.isKnownNonZero(BTC) &&
373370
Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
@@ -377,7 +374,6 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
377374
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
378375
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
379376
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
380-
381377
return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
382378
RightSCEV) &&
383379
SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
@@ -484,8 +480,19 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
484480
const SCEV *Step = LeftAR->getStepRecurrence(SE);
485481
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
486482
Pred)) {
487-
if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
483+
if (!canPeelLastIteration(L, SE))
484+
return;
485+
486+
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
487+
auto Guards = ScalarEvolution::LoopGuards::collect(&L, SE);
488+
if (shouldPeelLastIteration(L, Pred, LeftAR,
489+
SE.applyLoopGuards(RightSCEV, Guards),
490+
SE.applyLoopGuards(BTC, Guards), SE, TTI))
488491
DesiredPeelCountLast = 1;
492+
else
493+
assert(!shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, BTC, SE,
494+
TTI) &&
495+
"loop guards pessimized result");
489496
return;
490497
}
491498

llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,33 @@ define void @peel_with_guard_known_nonnegative_1(i32 %n) {
1313
; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
1414
; CHECK-NEXT: [[N_1:%.*]] = add i32 [[N]], 1
1515
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N_1]] to i64
16+
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
17+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
18+
; CHECK-NEXT: br i1 [[TMP1]], label %[[PH_SPLIT:.*]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN:.*]]
19+
; CHECK: [[PH_SPLIT]]:
1620
; CHECK-NEXT: br label %[[LOOP:.*]]
1721
; CHECK: [[LOOP]]:
18-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
22+
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[PH_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
23+
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
24+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], 1
25+
; CHECK-NEXT: [[EC1:%.*]] = icmp eq i64 [[IV_NEXT1]], [[TMP2]]
26+
; CHECK-NEXT: br i1 [[EC1]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
27+
; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]]:
28+
; CHECK-NEXT: [[DOTPH:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
29+
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_PEEL_BEGIN]]
30+
; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN]]:
31+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[PH]] ], [ [[DOTPH]], %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ]
32+
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
33+
; CHECK: [[LOOP_PEEL]]:
1934
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], [[N_EXT]]
2035
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C]], i32 10, i32 20
21-
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
36+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1
2237
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
23-
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]]
38+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT_PEEL_NEXT:.*]], label %[[EXIT_LOOPEXIT_PEEL_NEXT]]
39+
; CHECK: [[EXIT_LOOPEXIT_PEEL_NEXT]]:
40+
; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
41+
; CHECK: [[LOOP_PEEL_NEXT]]:
42+
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
2443
; CHECK: [[EXIT_LOOPEXIT]]:
2544
; CHECK-NEXT: br label %[[EXIT]]
2645
; CHECK: [[EXIT]]:
@@ -137,7 +156,7 @@ define void @peel_with_guard2(i32 %n) {
137156
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
138157
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
139158
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
140-
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
159+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
141160
; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]]:
142161
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
143162
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_PEEL_BEGIN]]
@@ -188,4 +207,5 @@ exit:
188207
;.
189208
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
190209
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
210+
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
191211
;.

0 commit comments

Comments
 (0)