Skip to content

Commit dec15d9

Browse files
committed
[indvars] Use loop guards when canonicalizing exit conditions
This extends the logic in canonicalizeExitConditions to use loop guards to specialize the SCEV of the loop invariant term before quering it's range.
1 parent 1331728 commit dec15d9

File tree

2 files changed

+66
-2
lines changed

2 files changed

+66
-2
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,7 +1456,8 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
14561456
const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
14571457
auto FullCR = ConstantRange::getFull(InnerBitWidth);
14581458
FullCR = FullCR.zeroExtend(OuterBitWidth);
1459-
if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) {
1459+
auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1460+
if (FullCR.contains(RHSCR)) {
14601461
// We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
14611462
// replace the signed condition with the unsigned version.
14621463
ICmp->setPredicate(ICmp->getUnsignedPredicate());
@@ -1530,7 +1531,8 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
15301531
const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
15311532
auto FullCR = ConstantRange::getFull(InnerBitWidth);
15321533
FullCR = FullCR.zeroExtend(OuterBitWidth);
1533-
if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) {
1534+
auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1535+
if (FullCR.contains(RHSCR)) {
15341536
doRotateTransform();
15351537
Changed = true;
15361538
// Note, we are leaving SCEV in an unfortunately imprecise case here

llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,3 +989,65 @@ for.end: ; preds = %for.body, %entry
989989
ret i16 %iv2
990990
}
991991

992+
define void @slt_restricted_rhs(i16 %n.raw) mustprogress {
993+
; CHECK-LABEL: @slt_restricted_rhs(
994+
; CHECK-NEXT: entry:
995+
; CHECK-NEXT: [[N:%.*]] = and i16 [[N_RAW:%.*]], 255
996+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[N]] to i8
997+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
998+
; CHECK: for.body:
999+
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
1000+
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
1001+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
1002+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
1003+
; CHECK: for.end:
1004+
; CHECK-NEXT: ret void
1005+
;
1006+
entry:
1007+
%n = and i16 %n.raw, 255
1008+
br label %for.body
1009+
1010+
for.body: ; preds = %entry, %for.body
1011+
%iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ]
1012+
%iv.next = add i8 %iv, 1
1013+
%zext = zext i8 %iv.next to i16
1014+
%cmp = icmp slt i16 %zext, %n
1015+
br i1 %cmp, label %for.body, label %for.end
1016+
1017+
for.end: ; preds = %for.body, %entry
1018+
ret void
1019+
}
1020+
1021+
define void @slt_guarded_rhs(i16 %n) mustprogress {
1022+
; CHECK-LABEL: @slt_guarded_rhs(
1023+
; CHECK-NEXT: entry:
1024+
; CHECK-NEXT: [[IN_RANGE:%.*]] = icmp ult i16 [[N:%.*]], 256
1025+
; CHECK-NEXT: br i1 [[IN_RANGE]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
1026+
; CHECK: for.body.preheader:
1027+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[N]] to i8
1028+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1029+
; CHECK: for.body:
1030+
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
1031+
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
1032+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
1033+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
1034+
; CHECK: for.end.loopexit:
1035+
; CHECK-NEXT: br label [[FOR_END]]
1036+
; CHECK: for.end:
1037+
; CHECK-NEXT: ret void
1038+
;
1039+
entry:
1040+
%in_range = icmp ult i16 %n, 256
1041+
br i1 %in_range, label %for.body, label %for.end
1042+
1043+
for.body: ; preds = %entry, %for.body
1044+
%iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ]
1045+
%iv.next = add i8 %iv, 1
1046+
%zext = zext i8 %iv.next to i16
1047+
%cmp = icmp slt i16 %zext, %n
1048+
br i1 %cmp, label %for.body, label %for.end
1049+
1050+
for.end: ; preds = %for.body, %entry
1051+
ret void
1052+
}
1053+

0 commit comments

Comments
 (0)