@@ -188,9 +188,90 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
188
188
Members.push_back (Index);
189
189
}
190
190
191
+ // / Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
192
+ // / return nullptr. \p A and \p B must have the same type.
193
+ static const SCEV *addSCEVOverflow (const SCEV *A, const SCEV *B,
194
+ ScalarEvolution &SE) {
195
+ if (!SE.willNotOverflow (Instruction::Add, false , A, B))
196
+ return nullptr ;
197
+ return SE.getAddExpr (A, B);
198
+ }
199
+
200
+ // / Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
201
+ // / return nullptr. \p A and \p B must have the same type.
202
+ static const SCEV *mulSCEVOverflow (const SCEV *A, const SCEV *B,
203
+ ScalarEvolution &SE) {
204
+ if (!SE.willNotOverflow (Instruction::Mul, false , A, B))
205
+ return nullptr ;
206
+ return SE.getMulExpr (A, B);
207
+ }
208
+
209
+ // / Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
210
+ // / \p MaxBTC is guaranteed inbounds of the accessed object.
211
+ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap (const SCEVAddRecExpr *AR,
212
+ const SCEV *MaxBTC,
213
+ const SCEV *EltSize,
214
+ ScalarEvolution &SE,
215
+ const DataLayout &DL) {
216
+ auto *PointerBase = SE.getPointerBase (AR->getStart ());
217
+ auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
218
+ if (!StartPtr)
219
+ return false ;
220
+ bool CheckForNonNull, CheckForFreed;
221
+ uint64_t DerefBytes = StartPtr->getValue ()->getPointerDereferenceableBytes (
222
+ DL, CheckForNonNull, CheckForFreed);
223
+
224
+ if (CheckForNonNull || CheckForFreed)
225
+ return false ;
226
+
227
+ const SCEV *Step = AR->getStepRecurrence (SE);
228
+ bool IsKnownNonNegative = SE.isKnownNonNegative (Step);
229
+ if (!IsKnownNonNegative && !SE.isKnownNegative (Step))
230
+ return false ;
231
+
232
+ Type *WiderTy = SE.getWiderType (MaxBTC->getType (), Step->getType ());
233
+ Step = SE.getNoopOrSignExtend (Step, WiderTy);
234
+ MaxBTC = SE.getNoopOrZeroExtend (MaxBTC, WiderTy);
235
+
236
+ // For the computations below, make sure they don't unsigned wrap.
237
+ if (!SE.isKnownPredicate (CmpInst::ICMP_UGE, AR->getStart (), StartPtr))
238
+ return false ;
239
+ const SCEV *StartOffset = SE.getNoopOrZeroExtend (
240
+ SE.getMinusSCEV (AR->getStart (), StartPtr), WiderTy);
241
+
242
+ const SCEV *OffsetAtLastIter =
243
+ mulSCEVOverflow (MaxBTC, SE.getAbsExpr (Step, false ), SE);
244
+ if (!OffsetAtLastIter)
245
+ return false ;
246
+
247
+ const SCEV *OffsetEndBytes = addSCEVOverflow (
248
+ OffsetAtLastIter, SE.getNoopOrZeroExtend (EltSize, WiderTy), SE);
249
+ if (!OffsetEndBytes)
250
+ return false ;
251
+
252
+ if (IsKnownNonNegative) {
253
+ // For positive steps, check if
254
+ // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
255
+ // while making sure none of the computations unsigned wrap themselves.
256
+ const SCEV *EndBytes = addSCEVOverflow (StartOffset, OffsetEndBytes, SE);
257
+ if (!EndBytes)
258
+ return false ;
259
+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, EndBytes,
260
+ SE.getConstant (WiderTy, DerefBytes));
261
+ }
262
+
263
+ // For negative steps check if
264
+ // * StartOffset >= (MaxBTC * Step + EltSize)
265
+ // * StartOffset <= DerefBytes.
266
+ assert (SE.isKnownNegative (Step) && " must be known negative" );
267
+ return SE.isKnownPredicate (CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
268
+ SE.isKnownPredicate (CmpInst::ICMP_ULE, StartOffset,
269
+ SE.getConstant (WiderTy, DerefBytes));
270
+ }
271
+
191
272
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess (
192
- const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount ,
193
- ScalarEvolution *SE,
273
+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC ,
274
+ const SCEV *MaxBTC, ScalarEvolution *SE,
194
275
DenseMap<std::pair<const SCEV *, Type *>,
195
276
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
196
277
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -206,11 +287,37 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
206
287
const SCEV *ScStart;
207
288
const SCEV *ScEnd;
208
289
290
+ auto &DL = Lp->getHeader ()->getDataLayout ();
291
+ Type *IdxTy = DL.getIndexType (PtrExpr->getType ());
292
+ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr (IdxTy, AccessTy);
209
293
if (SE->isLoopInvariant (PtrExpr, Lp)) {
210
294
ScStart = ScEnd = PtrExpr;
211
295
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
212
296
ScStart = AR->getStart ();
213
- ScEnd = AR->evaluateAtIteration (MaxBECount, *SE);
297
+ if (!isa<SCEVCouldNotCompute>(BTC))
298
+ // Evaluating AR at an exact BTC is safe: LAA separately checks that
299
+ // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
300
+ // the loop either triggers UB when executing a memory access with a
301
+ // poison pointer or the wrapping/poisoned pointer is not used.
302
+ ScEnd = AR->evaluateAtIteration (BTC, *SE);
303
+ else {
304
+ // Evaluating AR at MaxBTC may wrap and create an expression that is less
305
+ // than the start of the AddRec due to wrapping (for example consider
306
+ // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
307
+ // will get incremented by EltSize before returning, so this effectively
308
+ // sets ScEnd to the maximum unsigned value for the type. Note that LAA
309
+ // separately checks that accesses cannot not wrap, so unsigned max
310
+ // represents an upper bound.
311
+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap (AR, MaxBTC, EltSizeSCEV, *SE,
312
+ DL)) {
313
+ ScEnd = AR->evaluateAtIteration (MaxBTC, *SE);
314
+ } else {
315
+ ScEnd = SE->getAddExpr (
316
+ SE->getNegativeSCEV (EltSizeSCEV),
317
+ SE->getSCEV (ConstantExpr::getIntToPtr (
318
+ ConstantInt::get (EltSizeSCEV->getType (), -1 ), AR->getType ())));
319
+ }
320
+ }
214
321
const SCEV *Step = AR->getStepRecurrence (*SE);
215
322
216
323
// For expressions with negative step, the upper bound is ScStart and the
@@ -232,9 +339,6 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
232
339
assert (SE->isLoopInvariant (ScEnd, Lp) && " ScEnd needs to be invariant" );
233
340
234
341
// Add the size of the pointed element to ScEnd.
235
- auto &DL = Lp->getHeader ()->getDataLayout ();
236
- Type *IdxTy = DL.getIndexType (PtrExpr->getType ());
237
- const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr (IdxTy, AccessTy);
238
342
ScEnd = SE->getAddExpr (ScEnd, EltSizeSCEV);
239
343
240
344
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
@@ -250,9 +354,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
250
354
unsigned DepSetId, unsigned ASId,
251
355
PredicatedScalarEvolution &PSE,
252
356
bool NeedsFreeze) {
253
- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount ();
254
- const auto &[ScStart, ScEnd] = getStartAndEndForAccess (
255
- Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE (), &DC.getPointerBounds ());
357
+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
358
+ const SCEV *BTC = PSE.getBackedgeTakenCount ();
359
+ const auto &[ScStart, ScEnd] =
360
+ getStartAndEndForAccess (Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
361
+ PSE.getSE (), &DC.getPointerBounds ());
256
362
assert (!isa<SCEVCouldNotCompute>(ScStart) &&
257
363
!isa<SCEVCouldNotCompute>(ScEnd) &&
258
364
" must be able to compute both start and end expressions" );
@@ -1907,11 +2013,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
1907
2013
// required for correctness.
1908
2014
if (SE.isLoopInvariant (Src, InnermostLoop) ||
1909
2015
SE.isLoopInvariant (Sink, InnermostLoop)) {
1910
- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount ();
1911
- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess (
1912
- InnermostLoop, Src, ATy, MaxBECount, PSE.getSE (), &PointerBounds);
1913
- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess (
1914
- InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE (), &PointerBounds);
2016
+ const SCEV *BTC = PSE.getBackedgeTakenCount ();
2017
+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount ();
2018
+ const auto &[SrcStart_, SrcEnd_] =
2019
+ getStartAndEndForAccess (InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
2020
+ PSE.getSE (), &PointerBounds);
2021
+ const auto &[SinkStart_, SinkEnd_] =
2022
+ getStartAndEndForAccess (InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
2023
+ PSE.getSE (), &PointerBounds);
1915
2024
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
1916
2025
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
1917
2026
!isa<SCEVCouldNotCompute>(SinkStart_) &&
0 commit comments