Skip to content

[pred-deadalloc-elim] Teach the pass how to eliminate dead allocation… #25167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 142 additions & 69 deletions lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
using namespace swift;

STATISTIC(NumLoadPromoted, "Number of loads promoted");
STATISTIC(NumLoadTakePromoted, "Number of load takes promoted");
STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted");
STATISTIC(NumAllocRemoved, "Number of allocations completely removed");

Expand Down Expand Up @@ -1320,14 +1321,18 @@ class AllocOptimize {
DataflowContext(TheMemory, NumMemorySubElements, uses) {}

bool optimizeMemoryAccesses();

/// If the allocation is an autogenerated allocation that is only stored to
/// (after load promotion) then remove it completely.
bool tryToRemoveDeadAllocation();

private:
bool promoteLoad(SILInstruction *Inst);
bool promoteLoadCopy(SILInstruction *Inst);
void promoteLoadTake(LoadInst *Inst, MutableArrayRef<AvailableValue> values);
void promoteDestroyAddr(DestroyAddrInst *dai,
MutableArrayRef<AvailableValue> values);
bool canPromoteDestroyAddr(DestroyAddrInst *dai,
SmallVectorImpl<AvailableValue> &availableValues);
bool canPromoteTake(SILInstruction *i,
SmallVectorImpl<AvailableValue> &availableValues);
};

} // end anonymous namespace
Expand Down Expand Up @@ -1361,7 +1366,7 @@ static SILValue tryFindSrcAddrForLoad(SILInstruction *i) {
/// cross element accesses have been scalarized.
///
/// This returns true if the load has been removed from the program.
bool AllocOptimize::promoteLoad(SILInstruction *Inst) {
bool AllocOptimize::promoteLoadCopy(SILInstruction *Inst) {
// Note that we intentionally don't support forwarding of weak pointers,
// because the underlying value may drop be deallocated at any time. We would
// have to prove that something in this function is holding the weak value
Expand Down Expand Up @@ -1464,19 +1469,19 @@ bool AllocOptimize::promoteLoad(SILInstruction *Inst) {
}

/// Return true if we can promote the given destroy.
bool AllocOptimize::canPromoteDestroyAddr(
DestroyAddrInst *dai, SmallVectorImpl<AvailableValue> &availableValues) {
SILValue address = dai->getOperand();
bool AllocOptimize::canPromoteTake(
SILInstruction *inst, SmallVectorImpl<AvailableValue> &availableValues) {
SILValue address = inst->getOperand(0);

// We cannot promote destroys of address-only types, because we can't expose
// the load.
SILType loadTy = address->getType().getObjectType();
if (loadTy.isAddressOnly(*dai->getFunction()))
if (loadTy.isAddressOnly(*inst->getFunction()))
return false;

// If the box has escaped at this instruction, we can't safely promote the
// load.
if (DataflowContext.hasEscapedAt(dai))
if (DataflowContext.hasEscapedAt(inst))
return false;

// Compute the access path down to the field so we can determine precise
Expand All @@ -1498,15 +1503,15 @@ bool AllocOptimize::canPromoteDestroyAddr(
// return false. We have nothing further to do.
SmallVector<AvailableValue, 8> tmpList;
tmpList.resize(NumMemorySubElements);
if (!DataflowContext.computeAvailableValues(dai, firstElt, numLoadSubElements,
requiredElts, tmpList))
if (!DataflowContext.computeAvailableValues(
inst, firstElt, numLoadSubElements, requiredElts, tmpList))
return false;

// Now check that we can perform a take upon our available values. This
// implies today that our value is fully available. If the value is not fully
// available, we would need to split stores to promote this destroy_addr. We
// do not support that yet.
AvailableValueAggregator agg(dai, tmpList, Uses, deadEndBlocks,
AvailableValueAggregator agg(inst, tmpList, Uses, deadEndBlocks,
true /*isTake*/);
if (!agg.canTake(loadTy, firstElt))
return false;
Expand Down Expand Up @@ -1551,29 +1556,56 @@ void AllocOptimize::promoteDestroyAddr(
dai->eraseFromParent();
}

void AllocOptimize::promoteLoadTake(
LoadInst *li, MutableArrayRef<AvailableValue> availableValues) {
assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take &&
"load [copy], load [trivial], load should be handled by "
"promoteLoadCopy");
SILValue address = li->getOperand();
SILType loadTy = address->getType().getObjectType();

// Compute the access path down to the field so we can determine precise
// def/use behavior.
unsigned firstElt = computeSubelement(address, TheMemory);

// Aggregate together all of the subelements into something that has the same
// type as the load did, and emit smaller) loads for any subelements that were
// not available.
AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
true /*isTake*/);
SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);

++NumLoadTakePromoted;

LLVM_DEBUG(llvm::dbgs() << " *** Promoting load_take: " << *li << "\n");
LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal << "\n");

// Then perform the RAUW.
li->replaceAllUsesWith(newVal);
li->eraseFromParent();
}

namespace {

struct DestroyAddrPromotionState {
ArrayRef<SILInstruction *> destroys;
SmallVector<unsigned, 8> destroyAddrIndices;
struct TakePromotionState {
ArrayRef<SILInstruction *> takeInsts;
SmallVector<unsigned, 8> takeInstIndices;
SmallVector<AvailableValue, 32> availableValueList;
SmallVector<unsigned, 8> availableValueStartOffsets;

DestroyAddrPromotionState(ArrayRef<SILInstruction *> destroys)
: destroys(destroys) {}
TakePromotionState(ArrayRef<SILInstruction *> takeInsts)
: takeInsts(takeInsts) {}

unsigned size() const {
return destroyAddrIndices.size();
}
unsigned size() const { return takeInstIndices.size(); }

void initializeForDestroyAddr(unsigned destroyAddrIndex) {
void initializeForTakeInst(unsigned takeInstIndex) {
availableValueStartOffsets.push_back(availableValueList.size());
destroyAddrIndices.push_back(destroyAddrIndex);
takeInstIndices.push_back(takeInstIndex);
}

std::pair<DestroyAddrInst *, MutableArrayRef<AvailableValue>>
std::pair<SILInstruction *, MutableArrayRef<AvailableValue>>
getData(unsigned index) {
unsigned destroyAddrIndex = destroyAddrIndices[index];
unsigned takeInstIndex = takeInstIndices[index];
unsigned startOffset = availableValueStartOffsets[index];
unsigned count;

Expand All @@ -1585,36 +1617,21 @@ struct DestroyAddrPromotionState {

MutableArrayRef<AvailableValue> values(&availableValueList[startOffset],
count);
auto *dai = cast<DestroyAddrInst>(destroys[destroyAddrIndex]);
return {dai, values};
return {takeInsts[takeInstIndex], values};
}
};

} // end anonymous namespace

/// If the allocation is an autogenerated allocation that is only stored to
/// (after load promotion) then remove it completely.
bool AllocOptimize::tryToRemoveDeadAllocation() {
assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
"Unhandled allocation case");

auto *f = TheMemory->getFunction();

// We don't want to remove allocations that are required for useful debug
// information at -O0. As such, we only remove allocations if:
//
// 1. They are in a transparent function.
// 2. They are in a normal function, but didn't come from a VarDecl, or came
// from one that was autogenerated or inlined from a transparent function.
SILLocation loc = TheMemory->getLoc();
if (!f->isTransparent() &&
loc.getAsASTNode<VarDecl>() && !loc.isAutoGenerated() &&
!loc.is<MandatoryInlinedLocation>())
return false;

// Check the uses list to see if there are any non-store uses left over after
// load promotion and other things PMO does.
for (auto &u : Uses) {
// Check if our use list has any non store, non take uses that keep the value
// alive. Returns nullptr on success and the user that prevents removal on
// failure.
//
// NOTE: This also gathers up any takes that we need to process.
static SILInstruction *
checkForNonStoreNonTakeUses(ArrayRef<PMOMemoryUse> uses,
SmallVectorImpl<SILInstruction *> &loadTakeList) {
for (auto &u : uses) {
// Ignore removed instructions.
if (u.Inst == nullptr)
continue;
Expand All @@ -1623,33 +1640,73 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
case PMOUseKind::Assign:
// Until we can promote the value being destroyed by the assign, we can
// not remove deallocations with such assigns.
return false;
return u.Inst;
case PMOUseKind::InitOrAssign:
break; // These don't prevent removal.
continue; // These don't prevent removal.
case PMOUseKind::Load:
// For now only handle takes from alloc_stack.
//
// TODO: It should be implementable, but it has not been needed yet.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is Box vs. Stack relevant to this optimization? If it's all the same, don't distinguish them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't. I just didn't need it and was avoiding writing some more tests. You are right though it is the /right/ thing to do.

if (auto *li = dyn_cast<LoadInst>(u.Inst)) {
if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
loadTakeList.push_back(li);
continue;
}
}
return u.Inst;
case PMOUseKind::Initialization:
if (!isa<ApplyInst>(u.Inst) &&
// A copy_addr that is not a take affects the retain count
// of the source.
(!isa<CopyAddrInst>(u.Inst) ||
cast<CopyAddrInst>(u.Inst)->isTakeOfSrc()))
break;
continue;
// FALL THROUGH.
LLVM_FALLTHROUGH;
case PMOUseKind::Load:
LLVM_FALLTHROUGH;
case PMOUseKind::IndirectIn:
case PMOUseKind::InOutUse:
case PMOUseKind::Escape:
LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
"kept alive by: "
<< *u.Inst);
return false; // These do prevent removal.
return u.Inst; // These do prevent removal.
}
}

return nullptr;
}

// We don't want to remove allocations that are required for useful debug
// information at -O0. As such, we only remove allocations if:
//
// 1. They are in a transparent function.
// 2. They are in a normal function, but didn't come from a VarDecl, or came
// from one that was autogenerated or inlined from a transparent function.
static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
SILLocation loc = TheMemory->getLoc();
return TheMemory->getFunction()->isTransparent() ||
!loc.getAsASTNode<VarDecl>() || loc.isAutoGenerated() ||
loc.is<MandatoryInlinedLocation>();
}

bool AllocOptimize::tryToRemoveDeadAllocation() {
assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
"Unhandled allocation case");

if (!isRemovableAutogeneratedAllocation(TheMemory))
return false;

SmallVector<SILInstruction *, 8> loadTakeList;
// Check the uses list to see if there are any non-store uses left over after
// load promotion and other things PMO does.
if (auto *badUser = checkForNonStoreNonTakeUses(Uses, loadTakeList)) {
LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
"kept alive by: "
<< *badUser);
return false;
}

// If our memory is trivially typed, we can just remove it without needing to
// consider if the stored value needs to be destroyed. So at this point,
// delete the memory!
if (MemoryType.isTrivial(*f)) {
if (MemoryType.isTrivial(*TheMemory->getFunction())) {
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: "
<< *TheMemory);

Expand All @@ -1661,23 +1718,31 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
return true;
}

// Now make sure we can promote all load [take] and prepare state for each of
// them.
TakePromotionState loadTakeState(loadTakeList);
for (auto p : llvm::enumerate(loadTakeList)) {
loadTakeState.initializeForTakeInst(p.index());
if (!canPromoteTake(p.value(), loadTakeState.availableValueList))
return false;
}

// Otherwise removing the deallocation will drop any releases. Check that
// there is nothing preventing removal.
DestroyAddrPromotionState state(Releases);

TakePromotionState destroyAddrState(Releases);
for (auto p : llvm::enumerate(Releases)) {
auto *r = p.value();
if (r == nullptr)
continue;

// We stash all of the destroy_addr that we see.
if (auto *dai = dyn_cast<DestroyAddrInst>(r)) {
state.initializeForDestroyAddr(p.index() /*destroyAddrIndex*/);
destroyAddrState.initializeForTakeInst(p.index() /*destroyAddrIndex*/);
// Make sure we can actually promote this destroy addr. If we can not,
// then we must bail. In order to not gather available values twice, we
// gather the available values here that we will use to promote the
// values.
if (!canPromoteDestroyAddr(dai, state.availableValueList))
if (!canPromoteTake(dai, destroyAddrState.availableValueList))
return false;
continue;
}
Expand All @@ -1689,14 +1754,22 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
return false;
}

// If we reached this point, we can promote all of our destroy_addr.
for (unsigned i : range(state.size())) {
DestroyAddrInst *dai;
// If we reached this point, we can promote all of our destroy_addr and load
// take. Since our load [take] may be available values for our destroy_addr,
// we promote the destroy_addr first.
for (unsigned i : range(destroyAddrState.size())) {
SILInstruction *dai;
MutableArrayRef<AvailableValue> values;
std::tie(dai, values) = state.getData(i);
promoteDestroyAddr(dai, values);
std::tie(dai, values) = destroyAddrState.getData(i);
promoteDestroyAddr(cast<DestroyAddrInst>(dai), values);
// We do not need to unset releases, since we are going to exit here.
}
for (unsigned i : range(loadTakeState.size())) {
SILInstruction *li;
MutableArrayRef<AvailableValue> values;
std::tie(li, values) = loadTakeState.getData(i);
promoteLoadTake(cast<LoadInst>(li), values);
}

LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: "
<< *TheMemory);
Expand All @@ -1719,7 +1792,7 @@ bool AllocOptimize::optimizeMemoryAccesses() {
auto &use = Uses[i];
// Ignore entries for instructions that got expanded along the way.
if (use.Inst && use.Kind == PMOUseKind::Load) {
if (promoteLoad(use.Inst)) {
if (promoteLoadCopy(use.Inst)) {
Uses[i].Inst = nullptr; // remove entry if load got deleted.
changed = true;
}
Expand Down
Loading