Skip to content

Commit 32b72c3

Browse files
committed
Recommit "[SCEV] Use nw flag and symbolic iteration count to sharpen ranges of AddRecs"
It was reverted because of negative compile time impact. In this version, less powerful proof methods are used (non-recursive reasoning only), and scope limited to constant End values to avoid explision of complex proofs. Differential Revision: https://reviews.llvm.org/D89381
1 parent fbd62fe commit 32b72c3

File tree

5 files changed

+86
-7
lines changed

5 files changed

+86
-7
lines changed

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,13 @@ class ScalarEvolution {
14891489
ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
14901490
const SCEV *MaxBECount, unsigned BitWidth);
14911491

1492+
/// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
1493+
/// Start,+,\p Stop}<nw>.
1494+
ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
1495+
const SCEV *MaxBECount,
1496+
unsigned BitWidth,
1497+
RangeSignHint SignHint);
1498+
14921499
/// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
14931500
/// Stop} by "factoring out" a ternary expression from the add recurrence.
14941501
/// Helper called by \c getRange.

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5509,6 +5509,17 @@ ScalarEvolution::getRangeRef(const SCEV *S,
55095509
ConservativeResult =
55105510
ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
55115511
}
5512+
5513+
// Now try symbolic BE count and more powerful methods.
5514+
MaxBECount = computeMaxBackedgeTakenCount(AddRec->getLoop());
5515+
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
5516+
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
5517+
AddRec->hasNoSelfWrap()) {
5518+
auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
5519+
AddRec, MaxBECount, BitWidth, SignHint);
5520+
ConservativeResult =
5521+
ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
5522+
}
55125523
}
55135524

55145525
return setRange(AddRec, SignHint, std::move(ConservativeResult));
@@ -5678,6 +5689,67 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
56785689
return SR.intersectWith(UR, ConstantRange::Smallest);
56795690
}
56805691

5692+
ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
5693+
const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth,
5694+
ScalarEvolution::RangeSignHint SignHint) {
5695+
assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
5696+
assert(AddRec->hasNoSelfWrap() &&
5697+
"This only works for non-self-wrapping AddRecs!");
5698+
const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
5699+
const SCEV *Step = AddRec->getStepRecurrence(*this);
5700+
// Let's make sure that we can prove that we do not self-wrap during
5701+
// MaxBECount iterations. We need this because MaxBECount is a maximum
5702+
// iteration count estimate, and we might infer nw from some exit for which we
5703+
// do not know max exit count (or any other side reasoning).
5704+
// TODO: Turn into assert at some point.
5705+
MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
5706+
const SCEV *RangeWidth = getNegativeSCEV(getOne(AddRec->getType()));
5707+
const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
5708+
const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
5709+
if (!isKnownPredicate(ICmpInst::ICMP_ULE, MaxBECount, MaxItersWithoutWrap))
5710+
return ConstantRange::getFull(BitWidth);
5711+
5712+
ICmpInst::Predicate LEPred =
5713+
IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
5714+
ICmpInst::Predicate GEPred =
5715+
IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
5716+
const SCEV *Start = AddRec->getStart();
5717+
const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
5718+
// We could handle non-constant End, but it harms compile time a lot.
5719+
if (!isa<SCEVConstant>(End))
5720+
return ConstantRange::getFull(BitWidth);
5721+
5722+
// We know that there is no self-wrap. Let's take Start and End values and
5723+
// look at all intermediate values V1, V2, ..., Vn that IndVar takes during
5724+
// the iteration. They either lie inside the range [Min(Start, End),
5725+
// Max(Start, End)] or outside it:
5726+
//
5727+
// Case 1: RangeMin ... Start V1 ... VN End ... RangeMax;
5728+
// Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax;
5729+
//
5730+
// No self wrap flag guarantees that the intermediate values cannot be BOTH
5731+
// outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
5732+
// knowledge, let's try to prove that we are dealing with Case 1. It is so if
5733+
// Start <= End and step is positive, or Start >= End and step is negative.
5734+
ConstantRange StartRange =
5735+
IsSigned ? getSignedRange(Start) : getUnsignedRange(Start);
5736+
ConstantRange EndRange =
5737+
IsSigned ? getSignedRange(End) : getUnsignedRange(End);
5738+
ConstantRange RangeBetween = StartRange.unionWith(EndRange);
5739+
// If they already cover full iteration space, we will know nothing useful
5740+
// even if we prove what we want to prove.
5741+
if (RangeBetween.isFullSet())
5742+
return RangeBetween;
5743+
5744+
if (isKnownPositive(Step) &&
5745+
isKnownViaNonRecursiveReasoning(LEPred, Start, End))
5746+
return RangeBetween;
5747+
else if (isKnownNegative(Step) &&
5748+
isKnownViaNonRecursiveReasoning(GEPred, Start, End))
5749+
return RangeBetween;
5750+
return ConstantRange::getFull(BitWidth);
5751+
}
5752+
56815753
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
56825754
const SCEV *Step,
56835755
const SCEV *MaxBECount,

llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
77
; CHECK-NEXT: %0 = zext i32 %start to i64
88
; CHECK-NEXT: --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296)
99
; CHECK-NEXT: %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ]
10-
; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
10+
; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [0,4294967296) S: [0,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1111
; CHECK-NEXT: %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ]
1212
; CHECK-NEXT: --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
1313
; CHECK-NEXT: %iv.next = add i32 %iv, -1
@@ -21,7 +21,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
2121
; CHECK-NEXT: %stop = load i32, i32* %load.addr, align 4
2222
; CHECK-NEXT: --> %stop U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
2323
; CHECK-NEXT: %indvars.iv.next = add nsw i64 %indvars.iv, -1
24-
; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-4294967296,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
24+
; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-1,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
2525
; CHECK-NEXT: Determining loop execution counts for: @test_01
2626
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
2727
; CHECK-NEXT: exit count for loop: (zext i32 %start to i64)

llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ define void @test_10(i32 %n) {
474474
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
475475
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90
476476
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90
477-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMIN]], -99
477+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99
478478
; CHECK-NEXT: br label [[LOOP:%.*]]
479479
; CHECK: loop:
480480
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ define void @promote_latch_condition_decrementing_loop_01(i32* %p, i32* %a) {
196196
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
197197
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
198198
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
199-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
199+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
200200
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
201201
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
202202
;
@@ -241,7 +241,7 @@ define void @promote_latch_condition_decrementing_loop_02(i32* %p, i32* %a) {
241241
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
242242
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
243243
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
244-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
244+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
245245
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
246246
;
247247

@@ -285,7 +285,7 @@ define void @promote_latch_condition_decrementing_loop_03(i32* %p, i32* %a) {
285285
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
286286
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
287287
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
288-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
288+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
289289
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
290290
;
291291

@@ -336,7 +336,7 @@ define void @promote_latch_condition_decrementing_loop_04(i32* %p, i32* %a, i1 %
336336
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
337337
; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
338338
; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4
339-
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
339+
; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
340340
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
341341
; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
342342
;

0 commit comments

Comments
 (0)