Skip to content

Commit d24a0e8

Browse files
committed
[SCEV] Use constant range of RHS to prove NUW on narrow IV in trip count logic
The basic idea here is that given a zero extended narrow IV, we can prove the inner IV to be NUW if we can prove there's a value the inner IV must take before overflow which must exit the loop. Differential Revision: https://reviews.llvm.org/D109457
1 parent 3c7960c commit d24a0e8

File tree

3 files changed

+191
-21
lines changed

3 files changed

+191
-21
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11792,9 +11792,34 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
1179211792

1179311793
SmallVector<const SCEV*> Operands{AR->operands()};
1179411794
Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
11795-
11796-
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
1179711795
}
11796+
11797+
auto canProveNUW = [&]() {
11798+
if (!isLoopInvariant(RHS, L))
11799+
return false;
11800+
11801+
if (!isKnownNonZero(AR->getStepRecurrence(*this)))
11802+
// We need the sequence defined by AR to strictly increase in the
11803+
// unsigned integer domain for the logic below to hold.
11804+
return false;
11805+
11806+
const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType());
11807+
const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType());
11808+
// If RHS <=u Limit, then there must exist a value V in the sequence
11809+
// defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and
11810+
// V <=u UINT_MAX. Thus, we must exit the loop before unsigned
11811+
// overflow occurs. This limit also implies that a signed comparison
11812+
// (in the wide bitwidth) is equivalent to an unsigned comparison as
11813+
// the high bits on both sides must be zero.
11814+
APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this));
11815+
APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1);
11816+
Limit = Limit.zext(OuterBitWidth);
11817+
return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit);
11818+
};
11819+
if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW())
11820+
Flags = setFlags(Flags, SCEV::FlagNUW);
11821+
11822+
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
1179811823
if (AR->hasNoUnsignedWrap()) {
1179911824
// Emulate what getZeroExtendExpr would have done during construction
1180011825
// if we'd been able to infer the fact just above at that time.

llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll

Lines changed: 147 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,11 @@ for.end: ; preds = %for.body, %entry
279279
define void @rhs_narrow_range(i16 %n.raw) {
280280
; CHECK-LABEL: 'rhs_narrow_range'
281281
; CHECK-NEXT: Determining loop execution counts for: @rhs_narrow_range
282-
; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
283-
; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count.
282+
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16))<nuw><nsw>))<nsw>
283+
; CHECK-NEXT: Loop %for.body: max backedge-taken count is 253
284284
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16))<nuw><nsw>))<nsw>
285285
; CHECK-NEXT: Predicates:
286-
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
286+
; CHECK: Loop %for.body: Trip multiple is 1
287287
;
288288
entry:
289289
%n = and i16 %n.raw, 254
@@ -301,6 +301,150 @@ for.end: ; preds = %for.body, %entry
301301
ret void
302302
}
303303

304+
define void @ugt_constant_rhs(i16 %n.raw, i8 %start) mustprogress {
305+
;
306+
; CHECK-LABEL: 'ugt_constant_rhs'
307+
; CHECK-NEXT: Determining loop execution counts for: @ugt_constant_rhs
308+
; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
309+
; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count.
310+
; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count.
311+
;
312+
entry:
313+
br label %for.body
314+
315+
for.body: ; preds = %entry, %for.body
316+
%iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ]
317+
%iv.next = add i8 %iv, 1
318+
%zext = zext i8 %iv.next to i16
319+
%cmp = icmp ugt i16 %zext, 254
320+
br i1 %cmp, label %for.body, label %for.end
321+
322+
for.end: ; preds = %for.body, %entry
323+
ret void
324+
}
325+
326+
define void @ult_constant_rhs(i16 %n.raw, i8 %start) {
327+
;
328+
; CHECK-LABEL: 'ult_constant_rhs'
329+
; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs
330+
; CHECK-NEXT: Loop %for.body: backedge-taken count is (255 + (-1 * (zext i8 (1 + %start) to i16))<nsw>)<nsw>
331+
; CHECK-NEXT: Loop %for.body: max backedge-taken count is 255
332+
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (255 + (-1 * (zext i8 (1 + %start) to i16))<nsw>)<nsw>
333+
; CHECK-NEXT: Predicates:
334+
; CHECK: Loop %for.body: Trip multiple is 1
335+
;
336+
entry:
337+
br label %for.body
338+
339+
for.body: ; preds = %entry, %for.body
340+
%iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ]
341+
%iv.next = add i8 %iv, 1
342+
%zext = zext i8 %iv.next to i16
343+
%cmp = icmp ult i16 %zext, 255
344+
br i1 %cmp, label %for.body, label %for.end
345+
346+
for.end: ; preds = %for.body, %entry
347+
ret void
348+
}
349+
350+
define void @ult_constant_rhs_stride2(i16 %n.raw, i8 %start) {
351+
;
352+
; CHECK-LABEL: 'ult_constant_rhs_stride2'
353+
; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs_stride2
354+
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((1 + (-1 * (zext i8 (2 + %start) to i16))<nsw> + (254 umax (zext i8 (2 + %start) to i16))) /u 2)
355+
; CHECK-NEXT: Loop %for.body: max backedge-taken count is 127
356+
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((1 + (-1 * (zext i8 (2 + %start) to i16))<nsw> + (254 umax (zext i8 (2 + %start) to i16))) /u 2)
357+
; CHECK-NEXT: Predicates:
358+
; CHECK: Loop %for.body: Trip multiple is 1
359+
;
360+
entry:
361+
br label %for.body
362+
363+
for.body: ; preds = %entry, %for.body
364+
%iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ]
365+
%iv.next = add i8 %iv, 2
366+
%zext = zext i8 %iv.next to i16
367+
%cmp = icmp ult i16 %zext, 254
368+
br i1 %cmp, label %for.body, label %for.end
369+
370+
for.end: ; preds = %for.body, %entry
371+
ret void
372+
}
373+
374+
define void @ult_constant_rhs_stride2_neg(i16 %n.raw, i8 %start) {
375+
;
376+
; CHECK-LABEL: 'ult_constant_rhs_stride2_neg'
377+
; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs_stride2_neg
378+
; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
379+
; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count.
380+
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))<nsw>)<nsw> /u 2)
381+
; CHECK-NEXT: Predicates:
382+
; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: <nusw>
383+
;
384+
entry:
385+
br label %for.body
386+
387+
for.body: ; preds = %entry, %for.body
388+
%iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ]
389+
%iv.next = add i8 %iv, 2
390+
%zext = zext i8 %iv.next to i16
391+
%cmp = icmp ult i16 %zext, 255
392+
br i1 %cmp, label %for.body, label %for.end
393+
394+
for.end: ; preds = %for.body, %entry
395+
ret void
396+
}
397+
398+
399+
define void @ult_restricted_rhs(i16 %n.raw) {
400+
; CHECK-LABEL: 'ult_restricted_rhs'
401+
; CHECK-NEXT: Determining loop execution counts for: @ult_restricted_rhs
402+
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (zext i8 (trunc i16 %n.raw to i8) to i16)))<nsw>
403+
; CHECK-NEXT: Loop %for.body: max backedge-taken count is 254
404+
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (zext i8 (trunc i16 %n.raw to i8) to i16)))<nsw>
405+
; CHECK-NEXT: Predicates:
406+
; CHECK: Loop %for.body: Trip multiple is 1
407+
;
408+
entry:
409+
%n = and i16 %n.raw, 255
410+
br label %for.body
411+
412+
for.body: ; preds = %entry, %for.body
413+
%iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ]
414+
%iv.next = add i8 %iv, 1
415+
%zext = zext i8 %iv.next to i16
416+
%cmp = icmp ult i16 %zext, %n
417+
br i1 %cmp, label %for.body, label %for.end
418+
419+
for.end: ; preds = %for.body, %entry
420+
ret void
421+
}
422+
423+
define void @ult_guarded_rhs(i16 %n) {;
424+
; CHECK-LABEL: 'ult_guarded_rhs'
425+
; CHECK-NEXT: Determining loop execution counts for: @ult_guarded_rhs
426+
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax %n))
427+
; CHECK-NEXT: Loop %for.body: max backedge-taken count is -2
428+
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax %n))
429+
; CHECK-NEXT: Predicates:
430+
; CHECK: Loop %for.body: Trip multiple is 1
431+
;
432+
entry:
433+
%in_range = icmp ult i16 %n, 256
434+
br i1 %in_range, label %for.body, label %for.end
435+
436+
for.body: ; preds = %entry, %for.body
437+
%iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ]
438+
%iv.next = add i8 %iv, 1
439+
%zext = zext i8 %iv.next to i16
440+
%cmp = icmp ult i16 %zext, %n
441+
br i1 %cmp, label %for.body, label %for.end
442+
443+
for.end: ; preds = %for.body, %entry
444+
ret void
445+
}
446+
447+
304448

305449
declare void @llvm.assume(i1)
306450

llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,13 @@ define void @slt_non_constant_rhs_no_mustprogress(i16 %n.raw) {
129129
; CHECK-LABEL: @slt_non_constant_rhs_no_mustprogress(
130130
; CHECK-NEXT: entry:
131131
; CHECK-NEXT: [[N:%.*]] = and i16 [[N_RAW:%.*]], 255
132-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[N]] to i8
132+
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 1)
133133
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
134134
; CHECK: for.body:
135-
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
136-
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
137-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
138-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
135+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i16 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
136+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i16 [[INDVARS_IV]], 1
137+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i16 [[INDVARS_IV_NEXT]], [[SMAX]]
138+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
139139
; CHECK: for.end:
140140
; CHECK-NEXT: ret void
141141
;
@@ -932,17 +932,18 @@ for.end: ; preds = %for.body, %entry
932932
define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
933933
; CHECK-LABEL: @ult_multiuse_profit(
934934
; CHECK-NEXT: entry:
935-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8
935+
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[START:%.*]], 1
936+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i16
937+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 254 to i8
936938
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
937939
; CHECK: for.body:
938-
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
940+
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START]], [[ENTRY:%.*]] ]
939941
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
940-
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16
941-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
942+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP2]]
942943
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
943944
; CHECK: for.end:
944-
; CHECK-NEXT: [[ZEXT_LCSSA:%.*]] = phi i16 [ [[ZEXT]], [[FOR_BODY]] ]
945-
; CHECK-NEXT: ret i16 [[ZEXT_LCSSA]]
945+
; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254)
946+
; CHECK-NEXT: ret i16 [[UMAX]]
946947
;
947948
entry:
948949
br label %for.body
@@ -993,13 +994,13 @@ define void @slt_restricted_rhs(i16 %n.raw) mustprogress {
993994
; CHECK-LABEL: @slt_restricted_rhs(
994995
; CHECK-NEXT: entry:
995996
; CHECK-NEXT: [[N:%.*]] = and i16 [[N_RAW:%.*]], 255
996-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[N]] to i8
997+
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 1)
997998
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
998999
; CHECK: for.body:
999-
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
1000-
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
1001-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
1002-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
1000+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i16 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
1001+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i16 [[INDVARS_IV]], 1
1002+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i16 [[INDVARS_IV_NEXT]], [[SMAX]]
1003+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
10031004
; CHECK: for.end:
10041005
; CHECK-NEXT: ret void
10051006
;

0 commit comments

Comments
 (0)