Skip to content

Commit 5d01697

Browse files
authored
[LAA] Be more careful when evaluating AddRecs at symbolic max BTC. (#128061)
Evaluating AR at the symbolic max BTC may wrap and create an expression that is less than the start of the AddRec due to wrapping (for example consider MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd will get incremented by EltSize before returning, so this effectively sets ScEnd to unsigned max. Note that LAA separately checks that accesses cannot not wrap (52ded67, #127543), so unsigned max represents an upper bound. When there is a computable backedge-taken count, we are guaranteed to execute the number of iterations, and if any pointer would wrap it would be UB (or the access will never be executed, so cannot alias). It includes new tests from the previous discussion that show a case we wrap with a BTC, but it is UB due to the pointer after the object wrapping (in `evaluate-at-backedge-taken-count-wrapping.ll`) When we have only a maximum backedge taken count, we instead try to use dereferenceability information to determine if the pointer access must be in bounds for the maximum backedge taken count. PR: #128061
1 parent bf4afb0 commit 5d01697

File tree

7 files changed

+333
-53
lines changed

7 files changed

+333
-53
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -901,7 +901,10 @@ LLVM_ABI bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
901901
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
902902
ScalarEvolution &SE, bool CheckType = true);
903903

904-
/// Calculate Start and End points of memory access.
904+
/// Calculate Start and End points of memory access using exact backedge taken
905+
/// count \p BTC if computable or maximum backedge taken count \p MaxBTC
906+
/// otherwise.
907+
///
905908
/// Let's assume A is the first access and B is a memory access on N-th loop
906909
/// iteration. Then B is calculated as:
907910
/// B = A + Step*N .
@@ -915,8 +918,8 @@ LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
915918
/// There is no conflict when the intervals are disjoint:
916919
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
917920
LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
918-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
919-
ScalarEvolution *SE,
921+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
922+
const SCEV *MaxBTC, ScalarEvolution *SE,
920923
DenseMap<std::pair<const SCEV *, Type *>,
921924
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
922925

llvm/lib/Analysis/Loads.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,17 +319,22 @@ bool llvm::isDereferenceableAndAlignedInLoop(
319319
const SCEV *MaxBECount =
320320
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
321321
: SE.getConstantMaxBackedgeTakenCount(L);
322+
const SCEV *BECount = Predicates
323+
? SE.getPredicatedBackedgeTakenCount(L, *Predicates)
324+
: SE.getBackedgeTakenCount(L);
322325
if (isa<SCEVCouldNotCompute>(MaxBECount))
323326
return false;
324327

325328
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
326-
L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr);
329+
L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
327330
if (isa<SCEVCouldNotCompute>(AccessStart) ||
328331
isa<SCEVCouldNotCompute>(AccessEnd))
329332
return false;
330333

331334
// Try to get the access size.
332335
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
336+
if (isa<SCEVCouldNotCompute>(PtrDiff))
337+
return false;
333338
APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff);
334339

335340
Value *Base = nullptr;

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 123 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,90 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
188188
Members.push_back(Index);
189189
}
190190

191+
/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
192+
/// return nullptr. \p A and \p B must have the same type.
193+
static const SCEV *addSCEVOverflow(const SCEV *A, const SCEV *B,
194+
ScalarEvolution &SE) {
195+
if (!SE.willNotOverflow(Instruction::Add, false, A, B))
196+
return nullptr;
197+
return SE.getAddExpr(A, B);
198+
}
199+
200+
/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
201+
/// return nullptr. \p A and \p B must have the same type.
202+
static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
203+
ScalarEvolution &SE) {
204+
if (!SE.willNotOverflow(Instruction::Mul, false, A, B))
205+
return nullptr;
206+
return SE.getMulExpr(A, B);
207+
}
208+
209+
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210+
/// \p MaxBTC is guaranteed inbounds of the accessed object.
211+
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
212+
const SCEV *MaxBTC,
213+
const SCEV *EltSize,
214+
ScalarEvolution &SE,
215+
const DataLayout &DL) {
216+
auto *PointerBase = SE.getPointerBase(AR->getStart());
217+
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218+
if (!StartPtr)
219+
return false;
220+
bool CheckForNonNull, CheckForFreed;
221+
uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes(
222+
DL, CheckForNonNull, CheckForFreed);
223+
224+
if (CheckForNonNull || CheckForFreed)
225+
return false;
226+
227+
const SCEV *Step = AR->getStepRecurrence(SE);
228+
bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
229+
if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
230+
return false;
231+
232+
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
233+
Step = SE.getNoopOrSignExtend(Step, WiderTy);
234+
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
235+
236+
// For the computations below, make sure they don't unsigned wrap.
237+
if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))
238+
return false;
239+
const SCEV *StartOffset = SE.getNoopOrZeroExtend(
240+
SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
241+
242+
const SCEV *OffsetAtLastIter =
243+
mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, false), SE);
244+
if (!OffsetAtLastIter)
245+
return false;
246+
247+
const SCEV *OffsetEndBytes = addSCEVOverflow(
248+
OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
249+
if (!OffsetEndBytes)
250+
return false;
251+
252+
if (IsKnownNonNegative) {
253+
// For positive steps, check if
254+
// (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
255+
// while making sure none of the computations unsigned wrap themselves.
256+
const SCEV *EndBytes = addSCEVOverflow(StartOffset, OffsetEndBytes, SE);
257+
if (!EndBytes)
258+
return false;
259+
return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes,
260+
SE.getConstant(WiderTy, DerefBytes));
261+
}
262+
263+
// For negative steps check if
264+
// * StartOffset >= (MaxBTC * Step + EltSize)
265+
// * StartOffset <= DerefBytes.
266+
assert(SE.isKnownNegative(Step) && "must be known negative");
267+
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268+
SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
269+
SE.getConstant(WiderTy, DerefBytes));
270+
}
271+
191272
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
192-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
193-
ScalarEvolution *SE,
273+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
274+
const SCEV *MaxBTC, ScalarEvolution *SE,
194275
DenseMap<std::pair<const SCEV *, Type *>,
195276
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
196277
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -206,11 +287,37 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
206287
const SCEV *ScStart;
207288
const SCEV *ScEnd;
208289

290+
auto &DL = Lp->getHeader()->getDataLayout();
291+
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
292+
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
209293
if (SE->isLoopInvariant(PtrExpr, Lp)) {
210294
ScStart = ScEnd = PtrExpr;
211295
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
212296
ScStart = AR->getStart();
213-
ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
297+
if (!isa<SCEVCouldNotCompute>(BTC))
298+
// Evaluating AR at an exact BTC is safe: LAA separately checks that
299+
// accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
300+
// the loop either triggers UB when executing a memory access with a
301+
// poison pointer or the wrapping/poisoned pointer is not used.
302+
ScEnd = AR->evaluateAtIteration(BTC, *SE);
303+
else {
304+
// Evaluating AR at MaxBTC may wrap and create an expression that is less
305+
// than the start of the AddRec due to wrapping (for example consider
306+
// MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
307+
// will get incremented by EltSize before returning, so this effectively
308+
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
309+
// separately checks that accesses cannot not wrap, so unsigned max
310+
// represents an upper bound.
311+
if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE,
312+
DL)) {
313+
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
314+
} else {
315+
ScEnd = SE->getAddExpr(
316+
SE->getNegativeSCEV(EltSizeSCEV),
317+
SE->getSCEV(ConstantExpr::getIntToPtr(
318+
ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType())));
319+
}
320+
}
214321
const SCEV *Step = AR->getStepRecurrence(*SE);
215322

216323
// For expressions with negative step, the upper bound is ScStart and the
@@ -232,9 +339,6 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
232339
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
233340

234341
// Add the size of the pointed element to ScEnd.
235-
auto &DL = Lp->getHeader()->getDataLayout();
236-
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
237-
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
238342
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
239343

240344
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
@@ -250,9 +354,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
250354
unsigned DepSetId, unsigned ASId,
251355
PredicatedScalarEvolution &PSE,
252356
bool NeedsFreeze) {
253-
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
254-
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
255-
Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
357+
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
358+
const SCEV *BTC = PSE.getBackedgeTakenCount();
359+
const auto &[ScStart, ScEnd] =
360+
getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361+
PSE.getSE(), &DC.getPointerBounds());
256362
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
257363
!isa<SCEVCouldNotCompute>(ScEnd) &&
258364
"must be able to compute both start and end expressions");
@@ -1907,11 +2013,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19072013
// required for correctness.
19082014
if (SE.isLoopInvariant(Src, InnermostLoop) ||
19092015
SE.isLoopInvariant(Sink, InnermostLoop)) {
1910-
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
1911-
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1912-
InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
1913-
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1914-
InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
2016+
const SCEV *BTC = PSE.getBackedgeTakenCount();
2017+
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
2018+
const auto &[SrcStart_, SrcEnd_] =
2019+
getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
2020+
PSE.getSE(), &PointerBounds);
2021+
const auto &[SinkStart_, SinkEnd_] =
2022+
getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
2023+
PSE.getSE(), &PointerBounds);
19152024
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
19162025
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
19172026
!isa<SCEVCouldNotCompute>(SinkStart_) &&

llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_501_iterations_kno
7272
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
7373
; CHECK-NEXT: Grouped accesses:
7474
; CHECK-NEXT: Group GRP0:
75-
; CHECK-NEXT: (Low: %B High: (2004 + %B))
75+
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
7676
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
7777
; CHECK-NEXT: Group GRP1:
78-
; CHECK-NEXT: (Low: %A High: (2004 + %A))
78+
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
7979
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
8080
; CHECK-EMPTY:
8181
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -131,10 +131,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_not
131131
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
132132
; CHECK-NEXT: Grouped accesses:
133133
; CHECK-NEXT: Group GRP0:
134-
; CHECK-NEXT: (Low: %B High: (2000 + %B))
134+
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
135135
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
136136
; CHECK-NEXT: Group GRP1:
137-
; CHECK-NEXT: (Low: %A High: (2000 + %A))
137+
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
138138
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
139139
; CHECK-EMPTY:
140140
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -247,10 +247,10 @@ define i32 @all_exits_dominate_latch_countable_exits_at_most_1001_iterations_kno
247247
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
248248
; CHECK-NEXT: Grouped accesses:
249249
; CHECK-NEXT: Group GRP0:
250-
; CHECK-NEXT: (Low: %B High: (4004 + %B))
250+
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
251251
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
252252
; CHECK-NEXT: Group GRP1:
253-
; CHECK-NEXT: (Low: %A High: (4004 + %A))
253+
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
254254
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
255255
; CHECK-EMPTY:
256256
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -305,10 +305,10 @@ define i32 @all_exits_dominate_latch_countable_exits_at_most_1000_iterations_not
305305
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
306306
; CHECK-NEXT: Grouped accesses:
307307
; CHECK-NEXT: Group GRP0:
308-
; CHECK-NEXT: (Low: %B High: (4000 + %B))
308+
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
309309
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
310310
; CHECK-NEXT: Group GRP1:
311-
; CHECK-NEXT: (Low: %A High: (4000 + %A))
311+
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
312312
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
313313
; CHECK-EMPTY:
314314
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -350,6 +350,7 @@ e.2:
350350
ret i32 2
351351
}
352352

353+
353354
define i32 @not_all_exits_dominate_latch(ptr %A, ptr %B) {
354355
; CHECK-LABEL: 'not_all_exits_dominate_latch'
355356
; CHECK-NEXT: loop.header:
@@ -407,10 +408,10 @@ define i32 @b3_does_not_dominate_latch_known_deref(ptr dereferenceable(4000) %A,
407408
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
408409
; CHECK-NEXT: Grouped accesses:
409410
; CHECK-NEXT: Group GRP0:
410-
; CHECK-NEXT: (Low: %B High: (4004 + %B))
411+
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
411412
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
412413
; CHECK-NEXT: Group GRP1:
413-
; CHECK-NEXT: (Low: %A High: (4004 + %A))
414+
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
414415
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
415416
; CHECK-EMPTY:
416417
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -462,10 +463,10 @@ define i32 @b3_does_not_dominate_latch_not_known_deref(ptr %A, ptr %B) {
462463
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
463464
; CHECK-NEXT: Grouped accesses:
464465
; CHECK-NEXT: Group GRP0:
465-
; CHECK-NEXT: (Low: %B High: (4004 + %B))
466+
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
466467
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
467468
; CHECK-NEXT: Group GRP1:
468-
; CHECK-NEXT: (Low: %A High: (4004 + %A))
469+
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
469470
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
470471
; CHECK-EMPTY:
471472
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.

0 commit comments

Comments
 (0)