Skip to content

Commit 5af11ec

Browse files
committed
[Attributor] Determine potentially loaded values through memory
We already look through memory to determine where a value that is stored might pop up again (potential copies). This patch introduces the other direction with similar logic. If a value is loaded, we can follow all the accesses to the pointer (or better object) and try to determine what value might have been stored.
1 parent eb73af4 commit 5af11ec

File tree

13 files changed

+333
-282
lines changed

13 files changed

+333
-282
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,24 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
199199
bool &UsedAssumedInformation,
200200
bool Intraprocedural = false);
201201

202+
/// Collect all potential values \p LI could read into \p PotentialValues. That
203+
/// is, the only values read by \p LI are assumed to be known and all are in
204+
/// \p PotentialValues. Dependences onto \p QueryingAA are properly tracked,
205+
/// \p UsedAssumedInformation will inform the caller if assumed information was
206+
/// used.
207+
///
208+
/// \returns True if the assumed potential copies are all in \p PotentialValues,
209+
/// false if something went wrong and the copies could not be
210+
/// determined.
211+
bool getPotentiallyLoadedValues(Attributor &A, LoadInst &LI,
212+
SmallSetVector<Value *, 4> &PotentialValues,
213+
const AbstractAttribute &QueryingAA,
214+
bool &UsedAssumedInformation,
215+
bool OnlyExact = false);
216+
202217
/// Collect all potential values of the one stored by \p SI into
203218
/// \p PotentialCopies. That is, the only copies that were made via the
204-
/// store are assumed to be known and all in \p PotentialCopies. Dependences
219+
/// store are assumed to be known and all are in \p PotentialCopies. Dependences
205220
/// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will
206221
/// inform the caller if assumed information was used.
207222
///
@@ -210,7 +225,8 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
210225
/// determined.
211226
bool getPotentialCopiesOfStoredValue(
212227
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
213-
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
228+
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
229+
bool OnlyExact = false);
214230

215231
/// Return true if \p IRP is readonly. This will query respective AAs that
216232
/// deduce the information and introduce dependences for \p QueryingAA.

llvm/lib/Transforms/IPO/Attributor.cpp

Lines changed: 78 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -322,30 +322,40 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
322322
return nullptr;
323323
}
324324

325-
bool AA::getPotentialCopiesOfStoredValue(
326-
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
327-
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) {
328-
329-
Value &Ptr = *SI.getPointerOperand();
325+
template <bool IsLoad, typename Ty>
326+
static bool
327+
getPotentialCopiesOfMemoryValue(Attributor &A, Ty &I,
328+
SmallSetVector<Value *, 4> &PotentialCopies,
329+
const AbstractAttribute &QueryingAA,
330+
bool &UsedAssumedInformation, bool OnlyExact) {
331+
LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I
332+
<< " (only exact: " << OnlyExact << ")\n";);
333+
334+
Value &Ptr = *I.getPointerOperand();
330335
SmallVector<Value *, 8> Objects;
331-
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI,
336+
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I,
332337
UsedAssumedInformation)) {
333338
LLVM_DEBUG(
334339
dbgs() << "Underlying objects stored into could not be determined\n";);
335340
return false;
336341
}
337342

343+
// Containers to remember the pointer infos and new copies while we are not
344+
// sure that we can find all of them. If we abort we want to avoid spurious
345+
// dependences and potential copies in the provided container.
338346
SmallVector<const AAPointerInfo *> PIs;
339347
SmallVector<Value *> NewCopies;
340348

349+
const auto *TLI =
350+
A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction());
341351
for (Value *Obj : Objects) {
342352
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
343353
if (isa<UndefValue>(Obj))
344354
continue;
345355
if (isa<ConstantPointerNull>(Obj)) {
346356
// A null pointer access can be undefined but any offset from null may
347357
// be OK. We do not try to optimize the latter.
348-
if (!NullPointerIsDefined(SI.getFunction(),
358+
if (!NullPointerIsDefined(I.getFunction(),
349359
Ptr.getType()->getPointerAddressSpace()) &&
350360
A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) ==
351361
Obj)
@@ -354,8 +364,9 @@ bool AA::getPotentialCopiesOfStoredValue(
354364
dbgs() << "Underlying object is a valid nullptr, giving up.\n";);
355365
return false;
356366
}
367+
// TODO: Use assumed noalias return.
357368
if (!isa<AllocaInst>(Obj) && !isa<GlobalVariable>(Obj) &&
358-
!isNoAliasCall(Obj)) {
369+
!(IsLoad ? isAllocationFn(Obj, TLI) : isNoAliasCall(Obj))) {
359370
LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj
360371
<< "\n";);
361372
return false;
@@ -368,23 +379,54 @@ bool AA::getPotentialCopiesOfStoredValue(
368379
return false;
369380
}
370381

382+
if (IsLoad) {
383+
Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI);
384+
if (!InitialValue)
385+
return false;
386+
NewCopies.push_back(InitialValue);
387+
}
388+
371389
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
372-
if (!Acc.isRead())
390+
if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead()))
373391
return true;
374-
auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
375-
if (!LI) {
376-
LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
377-
"instruction not supported yet: "
378-
<< *Acc.getRemoteInst() << "\n";);
392+
if (OnlyExact && !IsExact) {
393+
LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst()
394+
<< ", abort!\n");
379395
return false;
380396
}
381-
NewCopies.push_back(LI);
397+
if (IsLoad) {
398+
assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
399+
if (Acc.isWrittenValueYetUndetermined())
400+
return true;
401+
if (!Acc.isWrittenValueUnknown()) {
402+
NewCopies.push_back(Acc.getWrittenValue());
403+
return true;
404+
}
405+
auto *SI = dyn_cast<StoreInst>(Acc.getRemoteInst());
406+
if (!SI) {
407+
LLVM_DEBUG(dbgs() << "Underlying object written through a non-store "
408+
"instruction not supported yet: "
409+
<< *Acc.getRemoteInst() << "\n";);
410+
return false;
411+
}
412+
NewCopies.push_back(SI->getValueOperand());
413+
} else {
414+
assert(isa<StoreInst>(I) && "Expected load or store instruction only!");
415+
auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
416+
if (!LI && OnlyExact) {
417+
LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
418+
"instruction not supported yet: "
419+
<< *Acc.getRemoteInst() << "\n";);
420+
return false;
421+
}
422+
NewCopies.push_back(Acc.getRemoteInst());
423+
}
382424
return true;
383425
};
384426

385427
auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj),
386428
DepClassTy::NONE);
387-
if (!PI.forallInterferingAccesses(A, QueryingAA, SI, CheckAccess)) {
429+
if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) {
388430
LLVM_DEBUG(
389431
dbgs()
390432
<< "Failed to verify all interfering accesses for underlying object: "
@@ -394,6 +436,9 @@ bool AA::getPotentialCopiesOfStoredValue(
394436
PIs.push_back(&PI);
395437
}
396438

439+
// Only if we were successful collection all potential copies we record
440+
// dependences (on non-fix AAPointerInfo AAs). We also only then modify the
441+
// given PotentialCopies container.
397442
for (auto *PI : PIs) {
398443
if (!PI->getState().isAtFixpoint())
399444
UsedAssumedInformation = true;
@@ -404,6 +449,23 @@ bool AA::getPotentialCopiesOfStoredValue(
404449
return true;
405450
}
406451

452+
bool AA::getPotentiallyLoadedValues(Attributor &A, LoadInst &LI,
453+
SmallSetVector<Value *, 4> &PotentialValues,
454+
const AbstractAttribute &QueryingAA,
455+
bool &UsedAssumedInformation,
456+
bool OnlyExact) {
457+
return getPotentialCopiesOfMemoryValue</* IsLoad */ true>(
458+
A, LI, PotentialValues, QueryingAA, UsedAssumedInformation, OnlyExact);
459+
}
460+
461+
bool AA::getPotentialCopiesOfStoredValue(
462+
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
463+
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
464+
bool OnlyExact) {
465+
return getPotentialCopiesOfMemoryValue</* IsLoad */ false>(
466+
A, SI, PotentialCopies, QueryingAA, UsedAssumedInformation, OnlyExact);
467+
}
468+
407469
static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
408470
const AbstractAttribute &QueryingAA,
409471
bool RequireReadNone, bool &IsKnown) {

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,31 @@ static bool genericValueTraversal(
400400
}
401401
}
402402

403+
if (auto *LI = dyn_cast<LoadInst>(V)) {
404+
bool UsedAssumedInformation = false;
405+
SmallSetVector<Value *, 4> PotentialCopies;
406+
if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, QueryingAA,
407+
UsedAssumedInformation,
408+
/* OnlyExact */ true)) {
409+
// Values have to be dynamically unique or we loose the fact that a
410+
// single llvm::Value might represent two runtime values (e.g., stack
411+
// locations in different recursive calls).
412+
bool DynamicallyUnique =
413+
llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) {
414+
return AA::isDynamicallyUnique(A, QueryingAA, *PC);
415+
});
416+
if (DynamicallyUnique &&
417+
(!Intraprocedural || !CtxI ||
418+
llvm::all_of(PotentialCopies, [CtxI](Value *PC) {
419+
return AA::isValidInScope(*PC, CtxI->getFunction());
420+
}))) {
421+
for (auto *PotentialCopy : PotentialCopies)
422+
Worklist.push_back({PotentialCopy, CtxI});
423+
continue;
424+
}
425+
}
426+
}
427+
403428
// Once a leaf is reached we inform the user through the callback.
404429
if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
405430
LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "

llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ define internal i32 @deref(i32* %x) nounwind {
2828
; IS__CGSCC_NPM-NEXT: [[X_PRIV:%.*]] = alloca i32, align 4
2929
; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]], align 4
3030
; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_PRIV]], align 4
31-
; IS__CGSCC_NPM-NEXT: ret i32 [[TMP0]]
31+
; IS__CGSCC_NPM-NEXT: ret i32 [[TMP2]]
3232
;
3333
entry:
3434
%tmp2 = load i32, i32* %x, align 4

llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -104,25 +104,15 @@ define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %
104104
; IS________OPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
105105
; IS________OPM-NEXT: ret void
106106
;
107-
; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
108-
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@promote_avx2
109-
; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
110-
; IS__TUNIT_NPM-NEXT: bb:
111-
; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32
112-
; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32
113-
; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
114-
; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
115-
; IS__TUNIT_NPM-NEXT: ret void
116-
;
117-
; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
118-
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@promote_avx2
119-
; IS__CGSCC_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
120-
; IS__CGSCC_NPM-NEXT: bb:
121-
; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32
122-
; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32
123-
; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
124-
; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 32
125-
; IS__CGSCC_NPM-NEXT: ret void
107+
; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable
108+
; IS________NPM-LABEL: define {{[^@]+}}@promote_avx2
109+
; IS________NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
110+
; IS________NPM-NEXT: bb:
111+
; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32
112+
; IS________NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32
113+
; IS________NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
114+
; IS________NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
115+
; IS________NPM-NEXT: ret void
126116
;
127117
bb:
128118
%tmp = load <4 x i64>, <4 x i64>* %arg1
@@ -181,8 +171,7 @@ define void @promote(<4 x i64>* %arg) #0 {
181171
; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]]
182172
; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32
183173
; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]]
184-
; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
185-
; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
174+
; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 2
186175
; IS__CGSCC_NPM-NEXT: ret void
187176
;
188177
bb:

0 commit comments

Comments
 (0)