Skip to content

Commit a10a64e

Browse files
committed
[SCEV] Recommit "Use nw flag and symbolic iteration count to sharpen ranges of AddRecs", attempt 2
Fixed wrapping range case & proof methods reduced to constant range checks to save compile time. Differential Revision: https://reviews.llvm.org/D89381
1 parent 3692d20 commit a10a64e

File tree

5 files changed

+89
-7
lines changed

5 files changed

+89
-7
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,13 @@ class ScalarEvolution {
14971497
ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
14981498
const SCEV *MaxBECount, unsigned BitWidth);
14991499

1500+
/// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
1501+
/// Start,+,\p Stop}<nw>.
1502+
ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
1503+
const SCEV *MaxBECount,
1504+
unsigned BitWidth,
1505+
RangeSignHint SignHint);
1506+
15001507
/// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
15011508
/// Stop} by "factoring out" a ternary expression from the add recurrence.
15021509
/// Helper called by \c getRange.

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5527,6 +5527,17 @@ ScalarEvolution::getRangeRef(const SCEV *S,
55275527
ConservativeResult =
55285528
ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
55295529
}
5530+
5531+
// Now try symbolic BE count and more powerful methods.
5532+
MaxBECount = computeMaxBackedgeTakenCount(AddRec->getLoop());
5533+
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
5534+
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
5535+
AddRec->hasNoSelfWrap()) {
5536+
auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
5537+
AddRec, MaxBECount, BitWidth, SignHint);
5538+
ConservativeResult =
5539+
ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
5540+
}
55305541
}
55315542

55325543
return setRange(AddRec, SignHint, std::move(ConservativeResult));
@@ -5696,6 +5707,70 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
56965707
return SR.intersectWith(UR, ConstantRange::Smallest);
56975708
}
56985709

5710+
ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
5711+
const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth,
5712+
ScalarEvolution::RangeSignHint SignHint) {
5713+
assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
5714+
assert(AddRec->hasNoSelfWrap() &&
5715+
"This only works for non-self-wrapping AddRecs!");
5716+
const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
5717+
const SCEV *Step = AddRec->getStepRecurrence(*this);
5718+
// Let's make sure that we can prove that we do not self-wrap during
5719+
// MaxBECount iterations. We need this because MaxBECount is a maximum
5720+
// iteration count estimate, and we might infer nw from some exit for which we
5721+
// do not know max exit count (or any other side reasoning).
5722+
// TODO: Turn into assert at some point.
5723+
MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
5724+
const SCEV *RangeWidth = getNegativeSCEV(getOne(AddRec->getType()));
5725+
const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
5726+
const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
5727+
if (!isKnownPredicate(ICmpInst::ICMP_ULE, MaxBECount, MaxItersWithoutWrap))
5728+
return ConstantRange::getFull(BitWidth);
5729+
5730+
ICmpInst::Predicate LEPred =
5731+
IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
5732+
ICmpInst::Predicate GEPred =
5733+
IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
5734+
const SCEV *Start = AddRec->getStart();
5735+
const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
5736+
5737+
// We know that there is no self-wrap. Let's take Start and End values and
5738+
// look at all intermediate values V1, V2, ..., Vn that IndVar takes during
5739+
// the iteration. They either lie inside the range [Min(Start, End),
5740+
// Max(Start, End)] or outside it:
5741+
//
5742+
// Case 1: RangeMin ... Start V1 ... VN End ... RangeMax;
5743+
// Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax;
5744+
//
5745+
// No self wrap flag guarantees that the intermediate values cannot be BOTH
5746+
// outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
5747+
// knowledge, let's try to prove that we are dealing with Case 1. It is so if
5748+
// Start <= End and step is positive, or Start >= End and step is negative.
5749+
ConstantRange StartRange =
5750+
IsSigned ? getSignedRange(Start) : getUnsignedRange(Start);
5751+
ConstantRange EndRange =
5752+
IsSigned ? getSignedRange(End) : getUnsignedRange(End);
5753+
ConstantRange RangeBetween = StartRange.unionWith(EndRange);
5754+
// If they already cover full iteration space, we will know nothing useful
5755+
// even if we prove what we want to prove.
5756+
if (RangeBetween.isFullSet())
5757+
return RangeBetween;
5758+
// Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax).
5759+
bool IsWrappingRange =
5760+
IsSigned ? RangeBetween.getLower().sge(RangeBetween.getUpper())
5761+
: RangeBetween.getLower().uge(RangeBetween.getUpper());
5762+
if (IsWrappingRange)
5763+
return ConstantRange::getFull(BitWidth);
5764+
5765+
if (isKnownPositive(Step) &&
5766+
isKnownPredicateViaConstantRanges(LEPred, Start, End))
5767+
return RangeBetween;
5768+
else if (isKnownNegative(Step) &&
5769+
isKnownPredicateViaConstantRanges(GEPred, Start, End))
5770+
return RangeBetween;
5771+
return ConstantRange::getFull(BitWidth);
5772+
}
5773+
56995774
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
57005775
const SCEV *Step,
57015776
const SCEV *MaxBECount,

llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
77
; CHECK-NEXT: %0 = zext i32 %start to i64
88
; CHECK-NEXT: --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296)
99
; CHECK-NEXT: %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ]
10-
; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
10+
; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [0,4294967296) S: [0,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1111
; CHECK-NEXT: %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ]
1212
; CHECK-NEXT: --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1313
; CHECK-NEXT: %iv.next = add i32 %iv, -1
@@ -21,7 +21,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
2121
; CHECK-NEXT: %stop = load i32, i32* %load.addr, align 4
2222
; CHECK-NEXT: --> %stop U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
2323
; CHECK-NEXT: %indvars.iv.next = add nsw i64 %indvars.iv, -1
24-
; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-4294967296,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
24+
; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-1,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
2525
; CHECK-NEXT: Determining loop execution counts for: @test_01
2626
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
2727
; CHECK-NEXT: exit count for loop: (zext i32 %start to i64)

llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ define void @test_10(i32 %n) {
474474
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
475475
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90
476476
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90
477-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMIN]], -99
477+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99
478478
; CHECK-NEXT: br label [[LOOP:%.*]]
479479
; CHECK: loop:
480480
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ define void @promote_latch_condition_decrementing_loop_01(i32* %p, i32* %a) {
196196
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
197197
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
198198
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
199-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
199+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
200200
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
201201
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
202202
;
@@ -241,7 +241,7 @@ define void @promote_latch_condition_decrementing_loop_02(i32* %p, i32* %a) {
241241
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
242242
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
243243
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
244-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
244+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
245245
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
246246
;
247247

@@ -285,7 +285,7 @@ define void @promote_latch_condition_decrementing_loop_03(i32* %p, i32* %a) {
285285
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
286286
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
287287
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
288-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
288+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
289289
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
290290
;
291291

@@ -336,7 +336,7 @@ define void @promote_latch_condition_decrementing_loop_04(i32* %p, i32* %a, i1 %
336336
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
337337
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
338338
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
339-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
339+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
340340
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
341341
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
342342
;

0 commit comments

Comments
 (0)