Skip to content

Commit bb288de

Browse files
authored
[LoopPeel] Support last iteration peeling of min/max intrinsics (#143598)
This isn't terribly useful at the moment because of the step=1 restriction but it should be functionally sound. This is mostly just making sure the codepaths don't diverge as we make other changes.
1 parent 2ab9c35 commit bb288de

File tree

2 files changed

+42
-11
lines changed

2 files changed

+42
-11
lines changed

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,11 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
545545
const SCEV *IterVal = AddRec->evaluateAtIteration(
546546
SE.getConstant(AddRec->getType(), NewPeelCount), SE);
547547
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
548-
Pred))
548+
Pred)) {
549+
if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
550+
DesiredPeelCountLast = 1;
549551
return;
552+
}
550553
DesiredPeelCount = NewPeelCount;
551554
};
552555

llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,27 @@ define i32 @smin_unit_step() {
4141
; CHECK-NEXT: [[ENTRY:.*]]:
4242
; CHECK-NEXT: br label %[[LOOP:.*]]
4343
; CHECK: [[LOOP]]:
44-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
44+
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
45+
; CHECK-NEXT: call void @foo(i32 1)
46+
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1
47+
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT1]], 1023
48+
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
49+
; CHECK: [[EXIT_PEEL_BEGIN]]:
50+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
51+
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
52+
; CHECK: [[LOOP_PEEL]]:
4553
; CHECK-NEXT: [[SUB:%.*]] = sub i32 1024, [[IV]]
4654
; CHECK-NEXT: [[MINMAX:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 1)
4755
; CHECK-NEXT: call void @foo(i32 [[MINMAX]])
48-
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
56+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1
4957
; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
50-
; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[EXIT:.*]]
58+
; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
59+
; CHECK: [[EXIT_PEEL_NEXT]]:
60+
; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
61+
; CHECK: [[LOOP_PEEL_NEXT]]:
62+
; CHECK-NEXT: br label %[[EXIT:.*]]
5163
; CHECK: [[EXIT]]:
52-
; CHECK-NEXT: [[MINMAX_LCSSA:%.*]] = phi i32 [ [[MINMAX]], %[[LOOP]] ]
53-
; CHECK-NEXT: ret i32 [[MINMAX_LCSSA]]
64+
; CHECK-NEXT: ret i32 [[MINMAX]]
5465
;
5566
entry:
5667
br label %loop
@@ -74,16 +85,28 @@ define i32 @smax_unit_step() {
7485
; CHECK-NEXT: [[ENTRY:.*]]:
7586
; CHECK-NEXT: br label %[[LOOP:.*]]
7687
; CHECK: [[LOOP]]:
77-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
88+
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
89+
; CHECK-NEXT: [[SUB1:%.*]] = sub nuw nsw i32 1024, [[IV1]]
90+
; CHECK-NEXT: call void @foo(i32 [[SUB1]])
91+
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1
92+
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT1]], 1023
93+
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP2:![0-9]+]]
94+
; CHECK: [[EXIT_PEEL_BEGIN]]:
95+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
96+
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
97+
; CHECK: [[LOOP_PEEL]]:
7898
; CHECK-NEXT: [[SUB:%.*]] = sub i32 1024, [[IV]]
7999
; CHECK-NEXT: [[MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB]], i32 1)
80100
; CHECK-NEXT: call void @foo(i32 [[MINMAX]])
81-
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
101+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1
82102
; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
83-
; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[EXIT:.*]]
103+
; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
104+
; CHECK: [[EXIT_PEEL_NEXT]]:
105+
; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
106+
; CHECK: [[LOOP_PEEL_NEXT]]:
107+
; CHECK-NEXT: br label %[[EXIT:.*]]
84108
; CHECK: [[EXIT]]:
85-
; CHECK-NEXT: [[MINMAX_LCSSA:%.*]] = phi i32 [ [[MINMAX]], %[[LOOP]] ]
86-
; CHECK-NEXT: ret i32 [[MINMAX_LCSSA]]
109+
; CHECK-NEXT: ret i32 [[MINMAX]]
87110
;
88111
entry:
89112
br label %loop
@@ -135,3 +158,8 @@ exit:
135158
ret i32 %minmax.lcssa
136159
}
137160

161+
;.
162+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
163+
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
164+
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
165+
;.

0 commit comments

Comments
 (0)