-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LoopPeel] Support last iteration peeling of min/max intrinsics #143598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This isn't terribly useful at the moment because of the step=1 restriction but it should be functionally sound. This is mostly just making sure the codepaths don't diverge as we make other changes.
@llvm/pr-subscribers-llvm-transforms Author: Philip Reames (preames) ChangesThis isn't terribly useful at the moment because of the step=1 restriction but it should be functionally sound. This is mostly just making sure the codepaths don't diverge as we make other changes. Full diff: https://github.com/llvm/llvm-project/pull/143598.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index f343962548259..27e70c5ddc0fc 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -545,8 +545,11 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
const SCEV *IterVal = AddRec->evaluateAtIteration(
SE.getConstant(AddRec->getType(), NewPeelCount), SE);
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
- Pred))
+ Pred)) {
+ if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
+ DesiredPeelCountLast = 1;
return;
+ }
DesiredPeelCount = NewPeelCount;
};
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll
index cd098e123b5f6..5e8540814fff2 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll
@@ -41,16 +41,27 @@ define i32 @smin_unit_step() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: call void @foo(i32 1)
+; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT1]], 1023
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[EXIT_PEEL_BEGIN]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
+; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
+; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 1024, [[IV]]
; CHECK-NEXT: [[MINMAX:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 1)
; CHECK-NEXT: call void @foo(i32 [[MINMAX]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
-; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; CHECK: [[EXIT_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
+; CHECK: [[LOOP_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[MINMAX_LCSSA:%.*]] = phi i32 [ [[MINMAX]], %[[LOOP]] ]
-; CHECK-NEXT: ret i32 [[MINMAX_LCSSA]]
+; CHECK-NEXT: ret i32 [[MINMAX]]
;
entry:
br label %loop
@@ -74,16 +85,28 @@ define i32 @smax_unit_step() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[SUB1:%.*]] = sub nuw nsw i32 1024, [[IV1]]
+; CHECK-NEXT: call void @foo(i32 [[SUB1]])
+; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT1]], 1023
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: [[EXIT_PEEL_BEGIN]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
+; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
+; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 1024, [[IV]]
; CHECK-NEXT: [[MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB]], i32 1)
; CHECK-NEXT: call void @foo(i32 [[MINMAX]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
-; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; CHECK: [[EXIT_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
+; CHECK: [[LOOP_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[MINMAX_LCSSA:%.*]] = phi i32 [ [[MINMAX]], %[[LOOP]] ]
-; CHECK-NEXT: ret i32 [[MINMAX_LCSSA]]
+; CHECK-NEXT: ret i32 [[MINMAX]]
;
entry:
br label %loop
@@ -135,3 +158,8 @@ exit:
ret i32 %minmax.lcssa
}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+;.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice, LGTM thanks!
…#143598) This isn't terribly useful at the moment because of the step=1 restriction but it should be functionally sound. This is mostly just making sure the codepaths don't diverge as we make other changes.
This isn't terribly useful at the moment because of the step=1 restriction but it should be functionally sound. This is mostly just making sure the codepaths don't diverge as we make other changes.