Skip to content

[LoopPeel] Avoid unprofitable last iteration peeling with equalities #143588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions llvm/lib/Transforms/Utils/LoopPeel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,23 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);

return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
RightSCEV) &&
SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
CmpPredicate InvPred = ICmpInst::getInversePredicate(Pred);
if (!SE.isKnownPredicate(InvPred, ValAtLastIter, RightSCEV) ||
!SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV))
return false;

// For a monotonic predicate, we've found the single transition point and
// don't need to check further.
if (!ICmpInst::isEquality(Pred))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use SE.getMonotonicPredicateType additionally here, to handle the case where the AddRec may wrap, which could make something like i < x not motontonic?

Copy link
Collaborator Author

@preames preames Jun 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused. In order to get here, we have to have identified a predicate which is either a nssw equality, or a monotonic predicate. (Along the min/max intrinsic, we check the same conditions, but using different names, etc..). What's the point of adding the additional monotonic check here?

return true;

// For an equality, the condition can toggle twice. For the peeling to be
// profitable, we need to know that this is the first toggle. The two
// toggles are separate by one iteration. If the BTC isn't at least two
// this will overflow, but that's okay correctness wise.
const SCEV *ValAtPriorIter = LeftAR->evaluateAtIteration(
SE.getMinusSCEV(BTC, SE.getConstant(BTC->getType(), 2)), SE);
return SE.isKnownPredicate(Pred, ValAtPriorIter, RightSCEV);
}

// Return the number of iterations to peel off from the beginning and end of the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,39 +176,18 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi_cmp_not_invar2(i32 %n) {
; CHECK-SAME: i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -2
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
; CHECK: [[ENTRY_SPLIT]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[IV]], [[SUB]]
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2
; CHECK-NEXT: call void @foo(i32 [[SEL]])
; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV]], 1
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP2]]
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ne i32 [[TMP3]], [[SUB]]
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = select i1 [[C_PEEL]], i32 1, i32 2
; CHECK-NEXT: call void @foo(i32 [[SEL_LCSSA]])
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP3]], 1
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[TMP3]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
; CHECK: [[EXIT_PEEL_NEXT]]:
; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
; CHECK: [[LOOP_PEEL_NEXT]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[SEL_LCSSA]]
; CHECK-NEXT: [[SEL_LCSSA1:%.*]] = phi i32 [ [[SEL_LCSSA]], %[[LOOP]] ]
; CHECK-NEXT: ret i32 [[SEL_LCSSA1]]
;
entry:
%sub = add i32 %n, -2
Expand Down Expand Up @@ -245,7 +224,7 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi_cmp_not_invar3(i32 %n) {
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP1]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]]
Expand Down Expand Up @@ -363,7 +342,7 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) {
; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
;
entry:
%sub = add i32 %n, -1
Expand Down Expand Up @@ -392,5 +371,4 @@ inner.latch:
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
;.
Loading