Skip to content

Commit f0944f4

Browse files
preamesnikic
andauthored
[SCEV] Prove no-self-wrap from negative power of two step (#101416)
We have existing code which reasons about a step evenly dividing the iteration space is a finite loop with a single exit implying no-self-wrap. The sign of the step doesn't effect this. --------- Co-authored-by: Nikita Popov <[email protected]>
1 parent e89129e commit f0944f4

File tree

3 files changed

+28
-21
lines changed

3 files changed

+28
-21
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,8 +1028,10 @@ class ScalarEvolution {
10281028
/// Test if the given expression is known to be non-zero.
10291029
bool isKnownNonZero(const SCEV *S);
10301030

1031-
/// Test if the given expression is known to be a power of 2.
1032-
bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false);
1031+
/// Test if the given expression is known to be a power of 2. OrNegative
1032+
/// allows matching negative power of 2s, and OrZero allows matching 0.
1033+
bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false,
1034+
bool OrNegative = false);
10331035

10341036
/// Splits SCEV expression \p S into two SCEVs. One of them is obtained from
10351037
/// \p S by substitution of all AddRec sub-expression related to loop \p L

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9158,7 +9158,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
91589158
InnerLHS = ZExt->getOperand();
91599159
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(InnerLHS);
91609160
AR && !AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() &&
9161-
isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true)) {
9161+
isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true,
9162+
/*OrNegative=*/true)) {
91629163
auto Flags = AR->getNoWrapFlags();
91639164
Flags = setFlags(Flags, SCEV::FlagNW);
91649165
SmallVector<const SCEV *> Operands{AR->operands()};
@@ -10843,10 +10844,13 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
1084310844
return getUnsignedRangeMin(S) != 0;
1084410845
}
1084510846

10846-
bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero) {
10847-
auto NonRecursive = [this](const SCEV *S) {
10847+
bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero,
10848+
bool OrNegative) {
10849+
auto NonRecursive = [this, OrNegative](const SCEV *S) {
1084810850
if (auto *C = dyn_cast<SCEVConstant>(S))
10849-
return C->getAPInt().isPowerOf2();
10851+
return C->getAPInt().isPowerOf2() ||
10852+
(OrNegative && C->getAPInt().isNegatedPowerOf2());
10853+
1085010854
// The vscale_range indicates vscale is a power-of-two.
1085110855
return isa<SCEVVScale>(S) && F.hasFnAttribute(Attribute::VScaleRange);
1085210856
};
@@ -12790,7 +12794,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
1279012794
if (!isLoopInvariant(RHS, L))
1279112795
return false;
1279212796

12793-
if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true))
12797+
if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true,
12798+
/*OrNegative*/ true))
1279412799
return false;
1279512800

1279612801
if (!ControlsOnlyExit || !loopHasNoAbnormalExits(L))

llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,6 @@ for.end: ; preds = %for.body, %entry
444444

445445
; The next two cases check to see if we can infer the flags on the IV
446446
; of a countdown loop using vscale strides.
447-
; TODO: We should be able to because vscale is a power of two and these
448-
; are finite loops by assumption.
449447

450448
define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
451449
; CHECK-LABEL: 'vscale_countdown_ne'
@@ -455,15 +453,16 @@ define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_r
455453
; CHECK-NEXT: %start = sub i32 %n, %vscale
456454
; CHECK-NEXT: --> ((-1 * vscale)<nsw> + %n) U: full-set S: full-set
457455
; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
458-
; CHECK-NEXT: --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
456+
; CHECK-NEXT: --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
459457
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
460-
; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
458+
; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable }
461459
; CHECK-NEXT: %sub = sub i32 %iv, %vscale
462-
; CHECK-NEXT: --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
460+
; CHECK-NEXT: --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
463461
; CHECK-NEXT: Determining loop execution counts for: @vscale_countdown_ne
464-
; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
465-
; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
466-
; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
462+
; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
463+
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647
464+
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
465+
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
467466
;
468467
entry:
469468
%vscale = call i32 @llvm.vscale.i32()
@@ -495,15 +494,16 @@ define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale
495494
; CHECK-NEXT: %start = sub i32 %n, %VF
496495
; CHECK-NEXT: --> ((-4 * vscale)<nsw> + %n) U: full-set S: full-set
497496
; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
498-
; CHECK-NEXT: --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
497+
; CHECK-NEXT: --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
499498
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
500-
; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
499+
; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable }
501500
; CHECK-NEXT: %sub = sub i32 %iv, %VF
502-
; CHECK-NEXT: --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
501+
; CHECK-NEXT: --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
503502
; CHECK-NEXT: Determining loop execution counts for: @vscalex4_countdown_ne
504-
; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
505-
; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
506-
; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
503+
; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
504+
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870911
505+
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
506+
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
507507
;
508508
entry:
509509
%vscale = call i32 @llvm.vscale.i32()

0 commit comments

Comments
 (0)