Skip to content

Commit 5b9cc46

Browse files
committed
[LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop
Currently when we encounter a negative step in the induction variable isDereferenceableAndAlignedInLoop bails out because the element size is signed greater than the step. This patch adds support for negative steps in cases where we detect the start address for the load is of the form base + offset. In this case the address decrements in each iteration so we need to calculate the access size differently. I have done this by caling getStartAndEndForAccess from LoopAccessAnalysis.cpp. The changed test in LoopVectorize/X86/load-deref-pred.ll now passes because previously we were calculating the total access size incorrectly, whereas now it is 412 bytes and fits perfectly into the alloca.
1 parent 4cec0ba commit 5b9cc46

File tree

5 files changed

+183
-261
lines changed

5 files changed

+183
-261
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,15 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
853853
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
854854
ScalarEvolution &SE, bool CheckType = true);
855855

856+
/// For a given Loop \p Lp and pointer \p PtrExpr return a pair of SCEV values
857+
/// representing the maximum range of addresses accessed in the loop, i.e.
858+
/// [min,max).
859+
std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
860+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
861+
ScalarEvolution *SE,
862+
DenseMap<std::pair<const SCEV *, Type *>,
863+
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
864+
856865
class LoopAccessInfoManager {
857866
/// The cache.
858867
DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;

llvm/lib/Analysis/Loads.cpp

Lines changed: 62 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/Analysis/Loads.h"
1414
#include "llvm/Analysis/AliasAnalysis.h"
1515
#include "llvm/Analysis/AssumeBundleQueries.h"
16+
#include "llvm/Analysis/LoopAccessAnalysis.h"
1617
#include "llvm/Analysis/LoopInfo.h"
1718
#include "llvm/Analysis/MemoryBuiltins.h"
1819
#include "llvm/Analysis/MemoryLocation.h"
@@ -275,84 +276,85 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
275276
bool llvm::isDereferenceableAndAlignedInLoop(
276277
LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
277278
AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
278-
auto &DL = LI->getDataLayout();
279-
Value *Ptr = LI->getPointerOperand();
280-
281-
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
282-
DL.getTypeStoreSize(LI->getType()).getFixedValue());
283-
const Align Alignment = LI->getAlign();
279+
const SCEV *Ptr = SE.getSCEV(LI->getPointerOperand());
280+
auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ptr);
284281

285-
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
286-
287-
// If given a uniform (i.e. non-varying) address, see if we can prove the
288-
// access is safe within the loop w/o needing predication.
289-
if (L->isLoopInvariant(Ptr))
290-
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
291-
HeaderFirstNonPHI, AC, &DT);
292-
293-
// Otherwise, check to see if we have a repeating access pattern where we can
294-
// prove that all accesses are well aligned and dereferenceable.
295-
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
282+
// Check to see if we have a repeating access pattern and it's possible
283+
// to prove all accesses are well aligned.
296284
if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
297285
return false;
286+
298287
auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
299288
if (!Step)
300289
return false;
301290

302-
auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
303-
if (!TC)
291+
// For the moment, restrict ourselves to the case where the access size is a
292+
// multiple of the requested alignment and the base is aligned.
293+
// TODO: generalize if a case found which warrants
294+
const Align Alignment = LI->getAlign();
295+
auto &DL = LI->getDataLayout();
296+
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
297+
DL.getTypeStoreSize(LI->getType()).getFixedValue());
298+
if (EltSize.urem(Alignment.value()) != 0)
304299
return false;
305300

306301
// TODO: Handle overlapping accesses.
307-
// We should be computing AccessSize as (TC - 1) * Step + EltSize.
308-
if (EltSize.sgt(Step->getAPInt()))
302+
if (EltSize.ugt(Step->getAPInt().abs()))
309303
return false;
310304

311-
// Compute the total access size for access patterns with unit stride and
312-
// patterns with gaps. For patterns with unit stride, Step and EltSize are the
313-
// same.
314-
// For patterns with gaps (i.e. non unit stride), we are
315-
// accessing EltSize bytes at every Step.
316-
APInt AccessSize = TC * Step->getAPInt();
305+
const SCEV *MaxBECount =
306+
SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates);
307+
if (isa<SCEVCouldNotCompute>(MaxBECount))
308+
return false;
317309

318-
assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
319-
"implied by addrec definition");
320-
Value *Base = nullptr;
321-
if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
322-
Base = StartS->getValue();
323-
} else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
324-
// Handle (NewBase + offset) as start value.
325-
const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
326-
const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
327-
if (StartS->getNumOperands() == 2 && Offset && NewBase) {
328-
// The following code below assumes the offset is unsigned, but GEP
329-
// offsets are treated as signed so we can end up with a signed value
330-
// here too. For example, suppose the initial PHI value is (i8 255),
331-
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
332-
if (Offset->getAPInt().isNegative())
333-
return false;
310+
const auto &[AccessStart, AccessEnd] =
311+
getStartAndEndForAccess(L, Ptr, LI->getType(), MaxBECount, &SE, nullptr);
312+
if (isa<SCEVCouldNotCompute>(AccessStart) ||
313+
isa<SCEVCouldNotCompute>(AccessEnd))
314+
return false;
334315

335-
// For the moment, restrict ourselves to the case where the offset is a
336-
// multiple of the requested alignment and the base is aligned.
337-
// TODO: generalize if a case found which warrants
338-
if (Offset->getAPInt().urem(Alignment.value()) != 0)
339-
return false;
340-
Base = NewBase->getValue();
341-
bool Overflow = false;
342-
AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow);
343-
if (Overflow)
344-
return false;
345-
}
346-
}
316+
// Try to get the access size.
317+
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
318+
APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff);
347319

348-
if (!Base)
320+
// If the (max) pointer difference is > 32 bits then it's unlikely to be
321+
// dereferenceable.
322+
if (MaxPtrDiff.getActiveBits() > 32)
349323
return false;
350324

351-
// For the moment, restrict ourselves to the case where the access size is a
352-
// multiple of the requested alignment and the base is aligned.
353-
// TODO: generalize if a case found which warrants
354-
if (EltSize.urem(Alignment.value()) != 0)
325+
Value *Base = nullptr;
326+
APInt AccessSize;
327+
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) {
328+
Base = NewBase->getValue();
329+
AccessSize = MaxPtrDiff;
330+
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
331+
if (MinAdd->getNumOperands() != 2)
332+
return false;
333+
334+
const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
335+
const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
336+
if (!Offset || !NewBase)
337+
return false;
338+
339+
// The following code below assumes the offset is unsigned, but GEP
340+
// offsets are treated as signed so we can end up with a signed value
341+
// here too. For example, suppose the initial PHI value is (i8 255),
342+
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
343+
if (Offset->getAPInt().isNegative())
344+
return false;
345+
346+
// For the moment, restrict ourselves to the case where the offset is a
347+
// multiple of the requested alignment and the base is aligned.
348+
// TODO: generalize if a case found which warrants
349+
if (Offset->getAPInt().urem(Alignment.value()) != 0)
350+
return false;
351+
352+
AccessSize = MaxPtrDiff + Offset->getAPInt();
353+
Base = NewBase->getValue();
354+
} else
355355
return false;
356+
357+
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
356358
return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
357359
HeaderFirstNonPHI, AC, &DT);
358360
}

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -203,29 +203,29 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
203203
///
204204
/// There is no conflict when the intervals are disjoint:
205205
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
206-
static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
207-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
208-
PredicatedScalarEvolution &PSE,
206+
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
207+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
208+
ScalarEvolution *SE,
209209
DenseMap<std::pair<const SCEV *, Type *>,
210-
std::pair<const SCEV *, const SCEV *>> &PointerBounds) {
211-
ScalarEvolution *SE = PSE.getSE();
212-
213-
auto [Iter, Ins] = PointerBounds.insert(
214-
{{PtrExpr, AccessTy},
215-
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
216-
if (!Ins)
217-
return Iter->second;
210+
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
211+
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
212+
if (PointerBounds) {
213+
auto [Iter, Ins] = PointerBounds->insert(
214+
{{PtrExpr, AccessTy},
215+
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
216+
if (!Ins)
217+
return Iter->second;
218+
PtrBoundsPair = &Iter->second;
219+
}
218220

219221
const SCEV *ScStart;
220222
const SCEV *ScEnd;
221223

222224
if (SE->isLoopInvariant(PtrExpr, Lp)) {
223225
ScStart = ScEnd = PtrExpr;
224226
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
225-
const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount();
226-
227227
ScStart = AR->getStart();
228-
ScEnd = AR->evaluateAtIteration(Ex, *SE);
228+
ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
229229
const SCEV *Step = AR->getStepRecurrence(*SE);
230230

231231
// For expressions with negative step, the upper bound is ScStart and the
@@ -244,16 +244,18 @@ static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
244244
return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
245245

246246
assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
247-
assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant");
247+
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
248248

249249
// Add the size of the pointed element to ScEnd.
250250
auto &DL = Lp->getHeader()->getDataLayout();
251251
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
252252
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
253253
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
254254

255-
Iter->second = {ScStart, ScEnd};
256-
return Iter->second;
255+
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
256+
if (PointerBounds)
257+
*PtrBoundsPair = Res;
258+
return Res;
257259
}
258260

259261
/// Calculate Start and End points of memory access using
@@ -263,8 +265,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
263265
unsigned DepSetId, unsigned ASId,
264266
PredicatedScalarEvolution &PSE,
265267
bool NeedsFreeze) {
268+
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
266269
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
267-
Lp, PtrExpr, AccessTy, PSE, DC.getPointerBounds());
270+
Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
268271
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
269272
!isa<SCEVCouldNotCompute>(ScEnd) &&
270273
"must be able to compute both start and end expressions");
@@ -1938,10 +1941,11 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19381941
// required for correctness.
19391942
if (SE.isLoopInvariant(Src, InnermostLoop) ||
19401943
SE.isLoopInvariant(Sink, InnermostLoop)) {
1941-
const auto &[SrcStart_, SrcEnd_] =
1942-
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
1943-
const auto &[SinkStart_, SinkEnd_] =
1944-
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
1944+
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
1945+
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1946+
InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
1947+
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1948+
InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
19451949
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
19461950
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
19471951
!isa<SCEVCouldNotCompute>(SinkStart_) &&

0 commit comments

Comments
 (0)