Skip to content

Commit b0cd022

Browse files
committed
Revert "[Attributor] Keep track of reached returns in AAPointerInfo (llvm#107479)"
makes 532.exa_sph 20x slower This reverts commit 56a0334. Change-Id: I796a33f51ce312ca550a5c48c0391de05be302a7
1 parent 7d2f310 commit b0cd022

File tree

6 files changed

+47
-57
lines changed

6 files changed

+47
-57
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6119,7 +6119,6 @@ struct AAPointerInfo : public AbstractAttribute {
61196119
virtual const_bin_iterator begin() const = 0;
61206120
virtual const_bin_iterator end() const = 0;
61216121
virtual int64_t numOffsetBins() const = 0;
6122-
virtual bool reachesReturn() const = 0;
61236122

61246123
/// Call \p CB on all accesses that might interfere with \p Range and return
61256124
/// true if all such accesses were known and the callback returned true for

llvm/lib/Transforms/IPO/Attributor.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,6 +1852,22 @@ bool Attributor::checkForAllUses(
18521852

18531853
User &Usr = *U->getUser();
18541854
AddUsers(Usr, /* OldUse */ nullptr);
1855+
1856+
auto *RI = dyn_cast<ReturnInst>(&Usr);
1857+
if (!RI)
1858+
continue;
1859+
1860+
Function &F = *RI->getFunction();
1861+
auto CallSitePred = [&](AbstractCallSite ACS) {
1862+
return AddUsers(*ACS.getInstruction(), U);
1863+
};
1864+
if (!checkForAllCallSites(CallSitePred, F, /* RequireAllCallSites */ true,
1865+
&QueryingAA, UsedAssumedInformation)) {
1866+
LLVM_DEBUG(dbgs() << "[Attributor] Could not follow return instruction "
1867+
"to all call sites: "
1868+
<< *RI << "\n");
1869+
return false;
1870+
}
18551871
}
18561872

18571873
return true;

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 10 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,6 @@ struct AA::PointerInfo::State : public AbstractState {
827827
AccessList = R.AccessList;
828828
OffsetBins = R.OffsetBins;
829829
RemoteIMap = R.RemoteIMap;
830-
ReachesReturn = R.ReachesReturn;
831830
return *this;
832831
}
833832

@@ -838,7 +837,6 @@ struct AA::PointerInfo::State : public AbstractState {
838837
std::swap(AccessList, R.AccessList);
839838
std::swap(OffsetBins, R.OffsetBins);
840839
std::swap(RemoteIMap, R.RemoteIMap);
841-
std::swap(ReachesReturn, R.ReachesReturn);
842840
return *this;
843841
}
844842

@@ -880,16 +878,11 @@ struct AA::PointerInfo::State : public AbstractState {
880878
AAPointerInfo::OffsetBinsTy OffsetBins;
881879
DenseMap<const Instruction *, SmallVector<unsigned>> RemoteIMap;
882880

883-
/// Flag to determine if the underlying pointer is reaching a return statement
884-
/// in the associated function or not. Returns in other functions cause
885-
/// invalidation.
886-
bool ReachesReturn = false;
887-
888881
/// See AAPointerInfo::forallInterferingAccesses.
889882
bool forallInterferingAccesses(
890883
AA::RangeTy Range,
891884
function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
892-
if (!isValidState() || ReachesReturn)
885+
if (!isValidState())
893886
return false;
894887

895888
for (const auto &It : OffsetBins) {
@@ -911,7 +904,7 @@ struct AA::PointerInfo::State : public AbstractState {
911904
Instruction &I,
912905
function_ref<bool(const AAPointerInfo::Access &, bool)> CB,
913906
AA::RangeTy &Range) const {
914-
if (!isValidState() || ReachesReturn)
907+
if (!isValidState())
915908
return false;
916909

917910
auto LocalList = RemoteIMap.find(&I);
@@ -1078,8 +1071,7 @@ struct AAPointerInfoImpl
10781071
return std::string("PointerInfo ") +
10791072
(isValidState() ? (std::string("#") +
10801073
std::to_string(OffsetBins.size()) + " bins")
1081-
: "<invalid>") +
1082-
(ReachesReturn ? " (returned)" : "");
1074+
: "<invalid>");
10831075
}
10841076

10851077
/// See AbstractAttribute::manifest(...).
@@ -1092,7 +1084,6 @@ struct AAPointerInfoImpl
10921084
virtual int64_t numOffsetBins() const override {
10931085
return State::numOffsetBins();
10941086
}
1095-
virtual bool reachesReturn() const override { return ReachesReturn; }
10961087

10971088
bool forallInterferingAccesses(
10981089
AA::RangeTy Range,
@@ -1382,7 +1373,6 @@ struct AAPointerInfoImpl
13821373

13831374
const auto &OtherAAImpl = static_cast<const AAPointerInfoImpl &>(OtherAA);
13841375
bool IsByval = OtherAAImpl.getAssociatedArgument()->hasByValAttr();
1385-
ReachesReturn = OtherAAImpl.ReachesReturn;
13861376

13871377
// Combine the accesses bin by bin.
13881378
ChangeStatus Changed = ChangeStatus::UNCHANGED;
@@ -1676,13 +1666,8 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
16761666
}
16771667
if (isa<PtrToIntInst>(Usr))
16781668
return false;
1679-
if (isa<CastInst>(Usr) || isa<SelectInst>(Usr))
1669+
if (isa<CastInst>(Usr) || isa<SelectInst>(Usr) || isa<ReturnInst>(Usr))
16801670
return HandlePassthroughUser(Usr, CurPtr, Follow);
1681-
// Returns are allowed if they are in the associated functions. Users can
1682-
// then check the call site return. Returns from other functions can't be
1683-
// tracked and are cause for invalidation.
1684-
if (auto *RI = dyn_cast<ReturnInst>(Usr))
1685-
return ReachesReturn = RI->getFunction() == getAssociatedFunction();
16861671

16871672
// For PHIs we need to take care of the recurrence explicitly as the value
16881673
// might change while we iterate through a loop. For now, we give up if
@@ -1913,37 +1898,15 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
19131898
DepClassTy::REQUIRED);
19141899
if (!CSArgPI)
19151900
return false;
1916-
bool IsArgMustAcc = (getUnderlyingObject(CurPtr) == &AssociatedValue);
1901+
bool IsMustAcc = (getUnderlyingObject(CurPtr) == &AssociatedValue);
19171902
Changed = translateAndAddState(A, *CSArgPI, OffsetInfoMap[CurPtr], *CB,
1918-
IsArgMustAcc) |
1919-
Changed;
1920-
if (!CSArgPI->reachesReturn())
1921-
return isValidState();
1922-
1923-
Function *Callee = CB->getCalledFunction();
1924-
if (!Callee || Callee->arg_size() <= ArgNo)
1925-
return false;
1926-
bool UsedAssumedInformation = false;
1927-
auto ReturnedValue = A.getAssumedSimplified(
1928-
IRPosition::returned(*Callee), *this, UsedAssumedInformation,
1929-
AA::ValueScope::Intraprocedural);
1930-
auto *ReturnedArg =
1931-
dyn_cast_or_null<Argument>(ReturnedValue.value_or(nullptr));
1932-
auto *Arg = Callee->getArg(ArgNo);
1933-
if (ReturnedArg && Arg != ReturnedArg)
1934-
return true;
1935-
bool IsRetMustAcc = IsArgMustAcc && (ReturnedArg == Arg);
1936-
const auto *CSRetPI = A.getAAFor<AAPointerInfo>(
1937-
*this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
1938-
if (!CSRetPI)
1939-
return false;
1940-
Changed = translateAndAddState(A, *CSRetPI, OffsetInfoMap[CurPtr], *CB,
1941-
IsRetMustAcc) |
1903+
IsMustAcc) |
19421904
Changed;
19431905
return isValidState();
19441906
}
19451907
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB
19461908
<< "\n");
1909+
// TODO: Allow some call uses
19471910
return false;
19481911
}
19491912

@@ -2379,10 +2342,8 @@ struct AANoFreeFloating : AANoFreeImpl {
23792342
Follow = true;
23802343
return true;
23812344
}
2382-
if (isa<StoreInst>(UserI) || isa<LoadInst>(UserI))
2383-
return true;
2384-
2385-
if (isa<ReturnInst>(UserI) && getIRPosition().isArgumentPosition())
2345+
if (isa<StoreInst>(UserI) || isa<LoadInst>(UserI) ||
2346+
isa<ReturnInst>(UserI))
23862347
return true;
23872348

23882349
// Unknown user.
@@ -12789,7 +12750,7 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
1278912750
if (!PI)
1279012751
return indicatePessimisticFixpoint();
1279112752

12792-
if (!PI->getState().isValidState() || PI->reachesReturn())
12753+
if (!PI->getState().isValidState())
1279312754
return indicatePessimisticFixpoint();
1279412755

1279512756
const DataLayout &DL = A.getDataLayout();

llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3434
define dso_local i32 @main() {
3535
; TUNIT-LABEL: define {{[^@]+}}@main() {
3636
; TUNIT-NEXT: entry:
37-
; TUNIT-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8
38-
; TUNIT-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8
37+
; TUNIT-NEXT: [[ALLOC11:%.*]] = alloca i8, i32 0, align 8
38+
; TUNIT-NEXT: [[ALLOC22:%.*]] = alloca i8, i32 0, align 8
3939
; TUNIT-NEXT: [[THREAD:%.*]] = alloca i64, align 8
4040
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @foo, ptr nofree readnone align 4294967296 undef)
4141
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @bar, ptr noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef)
42-
; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @baz, ptr noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]])
43-
; TUNIT-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @buz, ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]])
42+
; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @baz, ptr noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC11]])
43+
; TUNIT-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @buz, ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC22]])
4444
; TUNIT-NEXT: ret i32 0
4545
;
4646
; CGSCC-LABEL: define {{[^@]+}}@main() {

llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3185,7 +3185,10 @@ define i32 @may_access_after_return(i32 noundef %N, i32 noundef %M) {
31853185
; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4
31863186
; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4
31873187
; TUNIT-NEXT: call void @write_both(ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]], ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR18]]
3188-
; TUNIT-NEXT: ret i32 8
3188+
; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
3189+
; TUNIT-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4
3190+
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
3191+
; TUNIT-NEXT: ret i32 [[ADD]]
31893192
;
31903193
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
31913194
; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return
@@ -3301,7 +3304,10 @@ define i32 @may_access_after_return_no_choice1(i32 noundef %N, i32 noundef %M) {
33013304
; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4
33023305
; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4
33033306
; TUNIT-NEXT: call void @write_both(ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]], ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR18]]
3304-
; TUNIT-NEXT: ret i32 8
3307+
; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
3308+
; TUNIT-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4
3309+
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
3310+
; TUNIT-NEXT: ret i32 [[ADD]]
33053311
;
33063312
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
33073313
; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice1
@@ -3336,7 +3342,10 @@ define i32 @may_access_after_return_no_choice2(i32 noundef %N, i32 noundef %M) {
33363342
; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4
33373343
; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4
33383344
; TUNIT-NEXT: call void @write_both(ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]], ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) #[[ATTR18]]
3339-
; TUNIT-NEXT: ret i32 8
3345+
; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4
3346+
; TUNIT-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4
3347+
; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
3348+
; TUNIT-NEXT: ret i32 [[ADD]]
33403349
;
33413350
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
33423351
; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice2

revert_patches.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,8 @@ Revert: breaks hipBlender*
9090
0e73bbd3450c [AMDGPU][PromoteAlloca] Don't stop when an alloca is too big to promote (#93466)
9191
contact: Pierre
9292
---
93+
revert : breaks 532.exa_sph: 20x slower
94+
56a033462ed2
95+
[Attributor] Keep track of reached returns in AAPointerInfo (#107479)
96+
RonL
97+
---

0 commit comments

Comments
 (0)