Skip to content

Commit c0d9bf2

Browse files
committed
[indvars] Allow rotation (narrowing) of exit test when discovering trip count
This relaxes the one-use requirement on the rotation transform specifically for the case where we know we're zexting an IV of the loop. This allows us to discover trip count information in SCEV, which seems worth a single extra loop invariant truncate. Honestly, I'd prefer if SCEV could just compute the trip count directly (e.g. D109457), but this unblocks practical benefit.
1 parent 2125eb3 commit c0d9bf2

File tree

2 files changed

+70
-4
lines changed

2 files changed

+70
-4
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,17 +1493,22 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
14931493
}
14941494
assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
14951495

1496-
if (!LHS->hasOneUse())
1497-
// Can't rotate without increasing instruction count
1498-
continue;
1499-
15001496
// Match (icmp unsigned-cond zext, RHS)
15011497
// TODO: Extend to handle corresponding sext/signed-cmp case
15021498
// TODO: Extend to other invertible functions
15031499
Value *LHSOp = nullptr;
15041500
if (!match(LHS, m_ZExt(m_Value(LHSOp))))
15051501
continue;
15061502

1503+
// In general, we only rotate if we can do so without increasing the number
1504+
// of instructions. The exception is when we have an zext(add-rec). The
1505+
// reason for allowing this exception is that we know we need to get rid
1506+
// of the zext for SCEV to be able to compute a trip count for said loops;
1507+
// we consider the new trip count valuable enough to increase instruction
1508+
// count by one.
1509+
if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1510+
continue;
1511+
15071512
// Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
15081513
// replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
15091514
// when zext is loop varying and RHS is loop invariant. This converts

llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,3 +928,64 @@ for.body: ; preds = %entry, %for.body
928928
for.end: ; preds = %for.body, %entry
929929
ret void
930930
}
931+
932+
define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
933+
; CHECK-LABEL: @ult_multiuse_profit(
934+
; CHECK-NEXT: entry:
935+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8
936+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
937+
; CHECK: for.body:
938+
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
939+
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
940+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16
941+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
942+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
943+
; CHECK: for.end:
944+
; CHECK-NEXT: [[ZEXT_LCSSA:%.*]] = phi i16 [ [[ZEXT]], [[FOR_BODY]] ]
945+
; CHECK-NEXT: ret i16 [[ZEXT_LCSSA]]
946+
;
947+
entry:
948+
br label %for.body
949+
950+
for.body: ; preds = %entry, %for.body
951+
%iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ]
952+
%iv.next = add i8 %iv, 1
953+
%zext = zext i8 %iv.next to i16
954+
%cmp = icmp ult i16 %zext, 254
955+
br i1 %cmp, label %for.body, label %for.end
956+
957+
for.end: ; preds = %for.body, %entry
958+
ret i16 %zext
959+
}
960+
961+
define i16 @ult_multiuse_profit2(i16 %n.raw, i8 %start) mustprogress {
962+
; CHECK-LABEL: @ult_multiuse_profit2(
963+
; CHECK-NEXT: entry:
964+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8
965+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
966+
; CHECK: for.body:
967+
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
968+
; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[ZEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
969+
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
970+
; CHECK-NEXT: [[ZEXT]] = zext i8 [[IV_NEXT]] to i16
971+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
972+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
973+
; CHECK: for.end:
974+
; CHECK-NEXT: [[IV2_LCSSA:%.*]] = phi i16 [ [[IV2]], [[FOR_BODY]] ]
975+
; CHECK-NEXT: ret i16 [[IV2_LCSSA]]
976+
;
977+
entry:
978+
br label %for.body
979+
980+
for.body: ; preds = %entry, %for.body
981+
%iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ]
982+
%iv2 = phi i16 [%zext, %for.body], [0, %entry]
983+
%iv.next = add i8 %iv, 1
984+
%zext = zext i8 %iv.next to i16
985+
%cmp = icmp ult i16 %zext, 254
986+
br i1 %cmp, label %for.body, label %for.end
987+
988+
for.end: ; preds = %for.body, %entry
989+
ret i16 %iv2
990+
}
991+

0 commit comments

Comments
 (0)