Skip to content

Commit ada2f9c

Browse files
committed
[pred-deadalloc-elim] Teach the pass how to eliminate dead allocations that are load [take], store [init] back into memory.
I am doing this to eliminate some differences in codegen before/after serialization ownership. It just means less of the tests need to be touched when I flip the switch. Specifically, today this change allows us to handle certain cases where there is a dead allocation being used to pass around a value at +1 by performing a load [take] and then storing a value back into the memory. The general format is an allocation that only has stores, load [take], and destroy_addr users. Consider the following SIL: ``` store %x to [init] %mem (0) %xhat = load [take] %mem (1) %xhat_cast = apply %f(%xhat) (2) store %xhat_cast to [init] %mem (3) destroy_addr %mem ``` Notice how assuming that we can get rid of the store, we can perform the following store -> load forwarding: ``` %xhat_cast = apply %f(%x) (2) store %xhat_cast to [init] %mem (3) destroy_addr %mem ``` In contrast, notice how we get an ownership violation (double consume of %x by (0) and (2)) if we can not get rid of the store: ``` store %x to [init] %mem %xhat_cast = apply %f(%x) store %xhat_cast to [init] %mem (2) destroy_addr %mem ``` This is in fact the same condition for promoting a destroy_addr since when a destroy_addr is a load [take] + destroy_value. So I was able to generalize the code for destroy_addr to handle this case.
1 parent e9d4687 commit ada2f9c

File tree

3 files changed

+295
-69
lines changed

3 files changed

+295
-69
lines changed

lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp

Lines changed: 142 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
using namespace swift;
3030

3131
STATISTIC(NumLoadPromoted, "Number of loads promoted");
32+
STATISTIC(NumLoadTakePromoted, "Number of load takes promoted");
3233
STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted");
3334
STATISTIC(NumAllocRemoved, "Number of allocations completely removed");
3435

@@ -1320,14 +1321,18 @@ class AllocOptimize {
13201321
DataflowContext(TheMemory, NumMemorySubElements, uses) {}
13211322

13221323
bool optimizeMemoryAccesses();
1324+
1325+
/// If the allocation is an autogenerated allocation that is only stored to
1326+
/// (after load promotion) then remove it completely.
13231327
bool tryToRemoveDeadAllocation();
13241328

13251329
private:
1326-
bool promoteLoad(SILInstruction *Inst);
1330+
bool promoteLoadCopy(SILInstruction *Inst);
1331+
void promoteLoadTake(LoadInst *Inst, MutableArrayRef<AvailableValue> values);
13271332
void promoteDestroyAddr(DestroyAddrInst *dai,
13281333
MutableArrayRef<AvailableValue> values);
1329-
bool canPromoteDestroyAddr(DestroyAddrInst *dai,
1330-
SmallVectorImpl<AvailableValue> &availableValues);
1334+
bool canPromoteTake(SILInstruction *i,
1335+
SmallVectorImpl<AvailableValue> &availableValues);
13311336
};
13321337

13331338
} // end anonymous namespace
@@ -1361,7 +1366,7 @@ static SILValue tryFindSrcAddrForLoad(SILInstruction *i) {
13611366
/// cross element accesses have been scalarized.
13621367
///
13631368
/// This returns true if the load has been removed from the program.
1364-
bool AllocOptimize::promoteLoad(SILInstruction *Inst) {
1369+
bool AllocOptimize::promoteLoadCopy(SILInstruction *Inst) {
13651370
// Note that we intentionally don't support forwarding of weak pointers,
13661371
// because the underlying value may drop be deallocated at any time. We would
13671372
// have to prove that something in this function is holding the weak value
@@ -1464,19 +1469,19 @@ bool AllocOptimize::promoteLoad(SILInstruction *Inst) {
14641469
}
14651470

14661471
/// Return true if we can promote the given destroy.
1467-
bool AllocOptimize::canPromoteDestroyAddr(
1468-
DestroyAddrInst *dai, SmallVectorImpl<AvailableValue> &availableValues) {
1469-
SILValue address = dai->getOperand();
1472+
bool AllocOptimize::canPromoteTake(
1473+
SILInstruction *inst, SmallVectorImpl<AvailableValue> &availableValues) {
1474+
SILValue address = inst->getOperand(0);
14701475

14711476
// We cannot promote destroys of address-only types, because we can't expose
14721477
// the load.
14731478
SILType loadTy = address->getType().getObjectType();
1474-
if (loadTy.isAddressOnly(*dai->getFunction()))
1479+
if (loadTy.isAddressOnly(*inst->getFunction()))
14751480
return false;
14761481

14771482
// If the box has escaped at this instruction, we can't safely promote the
14781483
// load.
1479-
if (DataflowContext.hasEscapedAt(dai))
1484+
if (DataflowContext.hasEscapedAt(inst))
14801485
return false;
14811486

14821487
// Compute the access path down to the field so we can determine precise
@@ -1498,15 +1503,15 @@ bool AllocOptimize::canPromoteDestroyAddr(
14981503
// return false. We have nothing further to do.
14991504
SmallVector<AvailableValue, 8> tmpList;
15001505
tmpList.resize(NumMemorySubElements);
1501-
if (!DataflowContext.computeAvailableValues(dai, firstElt, numLoadSubElements,
1502-
requiredElts, tmpList))
1506+
if (!DataflowContext.computeAvailableValues(
1507+
inst, firstElt, numLoadSubElements, requiredElts, tmpList))
15031508
return false;
15041509

15051510
// Now check that we can perform a take upon our available values. This
15061511
// implies today that our value is fully available. If the value is not fully
15071512
// available, we would need to split stores to promote this destroy_addr. We
15081513
// do not support that yet.
1509-
AvailableValueAggregator agg(dai, tmpList, Uses, deadEndBlocks,
1514+
AvailableValueAggregator agg(inst, tmpList, Uses, deadEndBlocks,
15101515
true /*isTake*/);
15111516
if (!agg.canTake(loadTy, firstElt))
15121517
return false;
@@ -1551,29 +1556,56 @@ void AllocOptimize::promoteDestroyAddr(
15511556
dai->eraseFromParent();
15521557
}
15531558

1559+
void AllocOptimize::promoteLoadTake(
1560+
LoadInst *li, MutableArrayRef<AvailableValue> availableValues) {
1561+
assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take &&
1562+
"load [copy], load [trivial], load should be handled by "
1563+
"promoteLoadCopy");
1564+
SILValue address = li->getOperand();
1565+
SILType loadTy = address->getType().getObjectType();
1566+
1567+
// Compute the access path down to the field so we can determine precise
1568+
// def/use behavior.
1569+
unsigned firstElt = computeSubelement(address, TheMemory);
1570+
1571+
// Aggregate together all of the subelements into something that has the same
1572+
// type as the load did, and emit smaller) loads for any subelements that were
1573+
// not available.
1574+
AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
1575+
true /*isTake*/);
1576+
SILValue newVal = agg.aggregateValues(loadTy, address, firstElt);
1577+
1578+
++NumLoadTakePromoted;
1579+
1580+
LLVM_DEBUG(llvm::dbgs() << " *** Promoting load_take: " << *li << "\n");
1581+
LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal << "\n");
1582+
1583+
// Then perform the RAUW.
1584+
li->replaceAllUsesWith(newVal);
1585+
li->eraseFromParent();
1586+
}
1587+
15541588
namespace {
15551589

1556-
struct DestroyAddrPromotionState {
1557-
ArrayRef<SILInstruction *> destroys;
1558-
SmallVector<unsigned, 8> destroyAddrIndices;
1590+
struct TakePromotionState {
1591+
ArrayRef<SILInstruction *> takeInsts;
1592+
SmallVector<unsigned, 8> takeInstIndices;
15591593
SmallVector<AvailableValue, 32> availableValueList;
15601594
SmallVector<unsigned, 8> availableValueStartOffsets;
15611595

1562-
DestroyAddrPromotionState(ArrayRef<SILInstruction *> destroys)
1563-
: destroys(destroys) {}
1596+
TakePromotionState(ArrayRef<SILInstruction *> takeInsts)
1597+
: takeInsts(takeInsts) {}
15641598

1565-
unsigned size() const {
1566-
return destroyAddrIndices.size();
1567-
}
1599+
unsigned size() const { return takeInstIndices.size(); }
15681600

1569-
void initializeForDestroyAddr(unsigned destroyAddrIndex) {
1601+
void initializeForTakeInst(unsigned takeInstIndex) {
15701602
availableValueStartOffsets.push_back(availableValueList.size());
1571-
destroyAddrIndices.push_back(destroyAddrIndex);
1603+
takeInstIndices.push_back(takeInstIndex);
15721604
}
15731605

1574-
std::pair<DestroyAddrInst *, MutableArrayRef<AvailableValue>>
1606+
std::pair<SILInstruction *, MutableArrayRef<AvailableValue>>
15751607
getData(unsigned index) {
1576-
unsigned destroyAddrIndex = destroyAddrIndices[index];
1608+
unsigned takeInstIndex = takeInstIndices[index];
15771609
unsigned startOffset = availableValueStartOffsets[index];
15781610
unsigned count;
15791611

@@ -1585,36 +1617,21 @@ struct DestroyAddrPromotionState {
15851617

15861618
MutableArrayRef<AvailableValue> values(&availableValueList[startOffset],
15871619
count);
1588-
auto *dai = cast<DestroyAddrInst>(destroys[destroyAddrIndex]);
1589-
return {dai, values};
1620+
return {takeInsts[takeInstIndex], values};
15901621
}
15911622
};
15921623

15931624
} // end anonymous namespace
15941625

1595-
/// If the allocation is an autogenerated allocation that is only stored to
1596-
/// (after load promotion) then remove it completely.
1597-
bool AllocOptimize::tryToRemoveDeadAllocation() {
1598-
assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
1599-
"Unhandled allocation case");
1600-
1601-
auto *f = TheMemory->getFunction();
1602-
1603-
// We don't want to remove allocations that are required for useful debug
1604-
// information at -O0. As such, we only remove allocations if:
1605-
//
1606-
// 1. They are in a transparent function.
1607-
// 2. They are in a normal function, but didn't come from a VarDecl, or came
1608-
// from one that was autogenerated or inlined from a transparent function.
1609-
SILLocation loc = TheMemory->getLoc();
1610-
if (!f->isTransparent() &&
1611-
loc.getAsASTNode<VarDecl>() && !loc.isAutoGenerated() &&
1612-
!loc.is<MandatoryInlinedLocation>())
1613-
return false;
1614-
1615-
// Check the uses list to see if there are any non-store uses left over after
1616-
// load promotion and other things PMO does.
1617-
for (auto &u : Uses) {
1626+
// Check if our use list has any non store, non take uses that keep the value
1627+
// alive. Returns nullptr on success and the user that prevents removal on
1628+
// failure.
1629+
//
1630+
// NOTE: This also gathers up any takes that we need to process.
1631+
static SILInstruction *
1632+
checkForNonStoreNonTakeUses(ArrayRef<PMOMemoryUse> uses,
1633+
SmallVectorImpl<SILInstruction *> &loadTakeList) {
1634+
for (auto &u : uses) {
16181635
// Ignore removed instructions.
16191636
if (u.Inst == nullptr)
16201637
continue;
@@ -1623,33 +1640,73 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
16231640
case PMOUseKind::Assign:
16241641
// Until we can promote the value being destroyed by the assign, we can
16251642
// not remove deallocations with such assigns.
1626-
return false;
1643+
return u.Inst;
16271644
case PMOUseKind::InitOrAssign:
1628-
break; // These don't prevent removal.
1645+
continue; // These don't prevent removal.
1646+
case PMOUseKind::Load:
1647+
// For now only handle takes from alloc_stack.
1648+
//
1649+
// TODO: It should be implementable, but it has not been needed yet.
1650+
if (auto *li = dyn_cast<LoadInst>(u.Inst)) {
1651+
if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
1652+
loadTakeList.push_back(li);
1653+
continue;
1654+
}
1655+
}
1656+
return u.Inst;
16291657
case PMOUseKind::Initialization:
16301658
if (!isa<ApplyInst>(u.Inst) &&
16311659
// A copy_addr that is not a take affects the retain count
16321660
// of the source.
16331661
(!isa<CopyAddrInst>(u.Inst) ||
16341662
cast<CopyAddrInst>(u.Inst)->isTakeOfSrc()))
1635-
break;
1663+
continue;
16361664
// FALL THROUGH.
1637-
LLVM_FALLTHROUGH;
1638-
case PMOUseKind::Load:
1665+
LLVM_FALLTHROUGH;
16391666
case PMOUseKind::IndirectIn:
16401667
case PMOUseKind::InOutUse:
16411668
case PMOUseKind::Escape:
1642-
LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
1643-
"kept alive by: "
1644-
<< *u.Inst);
1645-
return false; // These do prevent removal.
1669+
return u.Inst; // These do prevent removal.
16461670
}
16471671
}
16481672

1673+
return nullptr;
1674+
}
1675+
1676+
// We don't want to remove allocations that are required for useful debug
1677+
// information at -O0. As such, we only remove allocations if:
1678+
//
1679+
// 1. They are in a transparent function.
1680+
// 2. They are in a normal function, but didn't come from a VarDecl, or came
1681+
// from one that was autogenerated or inlined from a transparent function.
1682+
static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
1683+
SILLocation loc = TheMemory->getLoc();
1684+
return TheMemory->getFunction()->isTransparent() ||
1685+
!loc.getAsASTNode<VarDecl>() || loc.isAutoGenerated() ||
1686+
loc.is<MandatoryInlinedLocation>();
1687+
}
1688+
1689+
bool AllocOptimize::tryToRemoveDeadAllocation() {
1690+
assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
1691+
"Unhandled allocation case");
1692+
1693+
if (!isRemovableAutogeneratedAllocation(TheMemory))
1694+
return false;
1695+
1696+
SmallVector<SILInstruction *, 8> loadTakeList;
1697+
// Check the uses list to see if there are any non-store uses left over after
1698+
// load promotion and other things PMO does.
1699+
if (auto *badUser = checkForNonStoreNonTakeUses(Uses, loadTakeList)) {
1700+
LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
1701+
"kept alive by: "
1702+
<< *badUser);
1703+
return false;
1704+
}
1705+
16491706
// If our memory is trivially typed, we can just remove it without needing to
16501707
// consider if the stored value needs to be destroyed. So at this point,
16511708
// delete the memory!
1652-
if (MemoryType.isTrivial(*f)) {
1709+
if (MemoryType.isTrivial(*TheMemory->getFunction())) {
16531710
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: "
16541711
<< *TheMemory);
16551712

@@ -1661,23 +1718,31 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
16611718
return true;
16621719
}
16631720

1721+
// Now make sure we can promote all load [take] and prepare state for each of
1722+
// them.
1723+
TakePromotionState loadTakeState(loadTakeList);
1724+
for (auto p : llvm::enumerate(loadTakeList)) {
1725+
loadTakeState.initializeForTakeInst(p.index());
1726+
if (!canPromoteTake(p.value(), loadTakeState.availableValueList))
1727+
return false;
1728+
}
1729+
16641730
// Otherwise removing the deallocation will drop any releases. Check that
16651731
// there is nothing preventing removal.
1666-
DestroyAddrPromotionState state(Releases);
1667-
1732+
TakePromotionState destroyAddrState(Releases);
16681733
for (auto p : llvm::enumerate(Releases)) {
16691734
auto *r = p.value();
16701735
if (r == nullptr)
16711736
continue;
16721737

16731738
// We stash all of the destroy_addr that we see.
16741739
if (auto *dai = dyn_cast<DestroyAddrInst>(r)) {
1675-
state.initializeForDestroyAddr(p.index() /*destroyAddrIndex*/);
1740+
destroyAddrState.initializeForTakeInst(p.index() /*destroyAddrIndex*/);
16761741
// Make sure we can actually promote this destroy addr. If we can not,
16771742
// then we must bail. In order to not gather available values twice, we
16781743
// gather the available values here that we will use to promote the
16791744
// values.
1680-
if (!canPromoteDestroyAddr(dai, state.availableValueList))
1745+
if (!canPromoteTake(dai, destroyAddrState.availableValueList))
16811746
return false;
16821747
continue;
16831748
}
@@ -1689,14 +1754,22 @@ bool AllocOptimize::tryToRemoveDeadAllocation() {
16891754
return false;
16901755
}
16911756

1692-
// If we reached this point, we can promote all of our destroy_addr.
1693-
for (unsigned i : range(state.size())) {
1694-
DestroyAddrInst *dai;
1757+
// If we reached this point, we can promote all of our destroy_addr and load
1758+
// take. Since our load [take] may be available values for our destroy_addr,
1759+
// we promote the destroy_addr first.
1760+
for (unsigned i : range(destroyAddrState.size())) {
1761+
SILInstruction *dai;
16951762
MutableArrayRef<AvailableValue> values;
1696-
std::tie(dai, values) = state.getData(i);
1697-
promoteDestroyAddr(dai, values);
1763+
std::tie(dai, values) = destroyAddrState.getData(i);
1764+
promoteDestroyAddr(cast<DestroyAddrInst>(dai), values);
16981765
// We do not need to unset releases, since we are going to exit here.
16991766
}
1767+
for (unsigned i : range(loadTakeState.size())) {
1768+
SILInstruction *li;
1769+
MutableArrayRef<AvailableValue> values;
1770+
std::tie(li, values) = loadTakeState.getData(i);
1771+
promoteLoadTake(cast<LoadInst>(li), values);
1772+
}
17001773

17011774
LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: "
17021775
<< *TheMemory);
@@ -1719,7 +1792,7 @@ bool AllocOptimize::optimizeMemoryAccesses() {
17191792
auto &use = Uses[i];
17201793
// Ignore entries for instructions that got expanded along the way.
17211794
if (use.Inst && use.Kind == PMOUseKind::Load) {
1722-
if (promoteLoad(use.Inst)) {
1795+
if (promoteLoadCopy(use.Inst)) {
17231796
Uses[i].Inst = nullptr; // remove entry if load got deleted.
17241797
changed = true;
17251798
}

0 commit comments

Comments
 (0)