Skip to content

Commit a6199cf

Browse files
Merge pull request #41841 from nate-chandler/lexical_lifetimes/ssa-destroy-hoisting/fold-sequences
[SSADestroyHoisting] Fold into sequences.
2 parents fb6503f + 87d0c9f commit a6199cf

File tree

2 files changed

+520
-34
lines changed

2 files changed

+520
-34
lines changed

lib/SILOptimizer/Transforms/SSADestroyHoisting.cpp

Lines changed: 249 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989

9090
#define DEBUG_TYPE "ssa-destroy-hoisting"
9191

92+
#include "swift/AST/Type.h"
9293
#include "swift/Basic/GraphNodeWorklist.h"
9394
#include "swift/Basic/SmallPtrSetVector.h"
9495
#include "swift/SIL/BasicBlockDatastructures.h"
@@ -496,6 +497,9 @@ bool DeinitBarriers::DestroyReachability::checkReachablePhiBarrier(
496497
/// object.
497498
class HoistDestroys {
498499
SILValue storageRoot;
500+
SILFunction *function;
501+
SILModule &module;
502+
TypeExpansionContext typeExpansionContext;
499503
bool ignoreDeinitBarriers;
500504
SmallPtrSetImpl<SILInstruction *> &remainingDestroyAddrs;
501505
InstructionDeleter &deleter;
@@ -509,7 +513,9 @@ class HoistDestroys {
509513
HoistDestroys(SILValue storageRoot, bool ignoreDeinitBarriers,
510514
SmallPtrSetImpl<SILInstruction *> &remainingDestroyAddrs,
511515
InstructionDeleter &deleter)
512-
: storageRoot(storageRoot), ignoreDeinitBarriers(ignoreDeinitBarriers),
516+
: storageRoot(storageRoot), function(storageRoot->getFunction()),
517+
module(function->getModule()), typeExpansionContext(*function),
518+
ignoreDeinitBarriers(ignoreDeinitBarriers),
513519
remainingDestroyAddrs(remainingDestroyAddrs), deleter(deleter),
514520
destroyMergeBlocks(getFunction()) {}
515521

@@ -518,11 +524,20 @@ class HoistDestroys {
518524
protected:
519525
SILFunction *getFunction() const { return storageRoot->getFunction(); }
520526

521-
bool foldBarrier(SILInstruction *barrier, SILValue accessScope);
527+
bool foldBarrier(SILInstruction *barrier, const AccessStorage &storage,
528+
const DeinitBarriers &deinitBarriers);
522529

523-
bool foldBarrier(SILInstruction *barrier, const KnownStorageUses &knownUses,
530+
bool foldBarrier(SILInstruction *barrier, const AccessStorage &storage,
531+
const KnownStorageUses &knownUses,
524532
const DeinitBarriers &deinitBarriers);
525533

534+
bool checkFoldingBarrier(SILInstruction *instruction,
535+
SmallVectorImpl<LoadInst *> &loads,
536+
SmallVectorImpl<CopyAddrInst *> &copies,
537+
SmallPtrSetImpl<AccessPath::PathNode> &leaves,
538+
const AccessStorage &storage,
539+
const DeinitBarriers &deinitBarriers);
540+
526541
void insertDestroy(SILInstruction *barrier, SILInstruction *insertBefore,
527542
const KnownStorageUses &knownUses);
528543

@@ -531,7 +546,8 @@ class HoistDestroys {
531546

532547
void createSuccessorDestroys(SILBasicBlock *barrierBlock);
533548

534-
bool rewriteDestroys(const KnownStorageUses &knownUses,
549+
bool rewriteDestroys(const AccessStorage &storage,
550+
const KnownStorageUses &knownUses,
535551
const DeinitBarriers &deinitBarriers);
536552

537553
void mergeDestroys(SILBasicBlock *mergeBlock);
@@ -553,16 +569,17 @@ bool HoistDestroys::perform() {
553569
deinitBarriers.compute();
554570

555571
// No SIL changes happen before rewriting.
556-
return rewriteDestroys(knownUses, deinitBarriers);
572+
return rewriteDestroys(storage, knownUses, deinitBarriers);
557573
}
558574

559-
bool HoistDestroys::rewriteDestroys(const KnownStorageUses &knownUses,
575+
bool HoistDestroys::rewriteDestroys(const AccessStorage &storage,
576+
const KnownStorageUses &knownUses,
560577
const DeinitBarriers &deinitBarriers) {
561578
// Place a new destroy after each barrier instruction.
562579
for (SILInstruction *barrier : deinitBarriers.barriers) {
563580
auto *barrierBlock = barrier->getParent();
564581
if (barrier != barrierBlock->getTerminator()) {
565-
if (!foldBarrier(barrier, knownUses, deinitBarriers))
582+
if (!foldBarrier(barrier, storage, knownUses, deinitBarriers))
566583
insertDestroy(barrier, barrier->getNextInstruction(), knownUses);
567584
continue;
568585
}
@@ -610,30 +627,234 @@ bool HoistDestroys::rewriteDestroys(const KnownStorageUses &knownUses,
610627
return deleter.hadCallbackInvocation();
611628
}
612629

613-
bool HoistDestroys::foldBarrier(SILInstruction *barrier, SILValue storageRoot) {
614-
if (auto *load = dyn_cast<LoadInst>(barrier)) {
615-
if (stripAccessMarkers(load->getOperand()) ==
616-
stripAccessMarkers(storageRoot)) {
617-
if (load->getOwnershipQualifier() == LoadOwnershipQualifier::Copy) {
618-
load->setOwnershipQualifier(LoadOwnershipQualifier::Take);
630+
/// Try to fold the destroy_addr with the specified barrier, or a backwards
631+
/// sequence of instructions that it begins.
632+
///
633+
/// Do the following kinds of folds:
634+
///
635+
/// - loads:
636+
/// given: load [copy] %addr
637+
/// destroy_addr %addr
638+
/// yield: load [take]
639+
/// - copy_addrs:
640+
/// given: copy_addr %addr to ...
641+
/// destroy_addr %addr
642+
/// yield: copy_addr [take] %addr
643+
///
644+
/// Additionally, generalize this to subobjects. If there is a sequence of
645+
/// copy_addrs and loads that covers all the subobjects of %addr. Given
646+
/// projections %subobject_1 and %subobject_2 out of %addr which fully cover all
647+
/// the non-trivial fields of the recursive type-tree of %addr, fold
648+
///
649+
/// load [copy] %subobject_1
650+
/// copy_addr %subobject_2 to ...
651+
/// destroy_addr %addr
652+
///
653+
/// into
654+
///
655+
/// load [take] %subobject_1
656+
/// copy_addr [take] %subobject_2 to ...
657+
///
658+
/// so long as all the loads and copy_addrs occur within the same block.
659+
bool HoistDestroys::foldBarrier(SILInstruction *barrier,
660+
const AccessStorage &storage,
661+
const DeinitBarriers &deinitBarriers) {
662+
663+
// The load [copy]s which will be folded into load [take]s if folding is
664+
// possible.
665+
llvm::SmallVector<LoadInst *, 4> loads;
666+
// The copy_addrs which will be folded into copy_addr [take]s if folding is
667+
// possible.
668+
llvm::SmallVector<CopyAddrInst *, 4> copies;
669+
670+
// The non-trivial storage leaves of the root storage all of which must be
671+
// destroyed exactly once in the sequence of instructions prior to the
672+
// destroy_addr in order for folding to occur.
673+
llvm::SmallPtrSet<AccessPath::PathNode, 16> leaves;
674+
675+
visitProductLeafAccessPathNodes(storageRoot, typeExpansionContext, module,
676+
[&](AccessPath::PathNode node, SILType ty) {
677+
if (ty.isTrivial(*function))
678+
return;
679+
leaves.insert(node);
680+
});
681+
682+
for (auto *instruction = barrier; instruction != nullptr;
683+
instruction = instruction->getPreviousInstruction()) {
684+
if (checkFoldingBarrier(instruction, loads, copies, leaves, storage,
685+
deinitBarriers))
686+
return false;
687+
688+
// If we have load [copy]s or copy_addrs of projections out of the root
689+
// storage that cover all non-trivial product leaves, then we can fold!
690+
//
691+
// Stop looking for instructions to fold.
692+
if (leaves.empty())
693+
break;
694+
}
695+
696+
if (!leaves.empty())
697+
return false;
698+
699+
for (auto *load : loads) {
700+
assert(load->getOwnershipQualifier() == LoadOwnershipQualifier::Copy);
701+
load->setOwnershipQualifier(LoadOwnershipQualifier::Take);
702+
}
703+
for (auto *copy : copies) {
704+
assert(!copy->isTakeOfSrc());
705+
copy->setIsTakeOfSrc(IsTake);
706+
}
707+
708+
return true;
709+
}
710+
711+
/// Whether the specified instruction is a barrier to folding.
712+
///
713+
/// TODO: This is a bit more conservative that it needs to be in a couple of
714+
/// ways:
715+
///
716+
/// (1) even if we've already seen a leaf, we could still fold, in certain
717+
/// cases, we should be able to fold anyway. For example, given projections
718+
/// %p1 and %p2 of some root storage %a, in the following scenario:
719+
///
720+
/// %p1 = <PROJECT> %a
721+
/// %p2 = <PROJECT> %a
722+
/// %v1 = load [copy] %p1
723+
/// %v2_1 = load [copy] %p2
724+
/// %v2_1 = load [copy] %p2
725+
/// destroy_addr %a
726+
///
727+
/// we could fold destroy_addr %a into the first load [copy] %p2 and the
728+
/// load [copy] %p1:
729+
///
730+
/// %v1 = load [take] %p1
731+
/// %v2_1 = load [copy] %p2
732+
/// %v2_2 = load [take] %p1
733+
///
734+
/// And indeed we can do that for loads from a subprojection %p2_sub of
735+
/// %p2; the following
736+
///
737+
/// %v1 = load [copy] %p1
738+
/// %v2_sub = load [copy] %p2_sub
739+
/// %v2 = load [copy] %p2
740+
///
741+
/// could be folded to
742+
///
743+
/// %v1 = load [take] %p1
744+
/// %v2_sub = load [copy] %p2_sub
745+
/// %v2 = load [take] %p2
746+
///
747+
/// (2) We should be able to continue folding over a load [trivial] so long as
748+
/// the instructions that we're folding with don't destroy an aggregate that
749+
/// contains the projection which is the target of the load [trivial]. For
750+
/// example, given
751+
///
752+
/// %addr = alloc_stack %(X, I)
753+
/// %x_addr = tuple_element_addr %addr : $*(X, I), 0
754+
/// %i_addr = tuple_element_addr %addr : $*(X, I), 1
755+
/// %x = load [copy] %x_addr : $*X
756+
/// %i = load [trivial] %i_addr : $*I
757+
/// destroy_addr %addr
758+
///
759+
/// we should be able to fold the destroy_addr of the tuple with the load [copy]
760+
/// and ignore the load [trivial].
761+
///
762+
/// Doing this is complicated by the fact that we can't ignore the load
763+
/// [trivial] if the load [copy] is of the whole tuple. If we have instead
764+
///
765+
/// %addr = alloc_stack %(X, I)
766+
/// %x_addr = tuple_element_addr %addr : $*(X, I), 0
767+
/// %i_addr = tuple_element_addr %addr : $*(X, I), 1
768+
/// %x = load [copy] %addr : $*(X, I)
769+
/// %i = load [trivial] %i_addr : $*I
770+
/// destroy_addr %addr
771+
///
772+
/// then we cannot fold. If we did, we would end up with invalid SIL:
773+
///
774+
/// %x = load [take] %addr
775+
/// %i = load [trivial] %i_addr
776+
bool HoistDestroys::checkFoldingBarrier(
777+
SILInstruction *instruction, SmallVectorImpl<LoadInst *> &loads,
778+
SmallVectorImpl<CopyAddrInst *> &copies,
779+
SmallPtrSetImpl<AccessPath::PathNode> &leaves, const AccessStorage &storage,
780+
const DeinitBarriers &deinitBarriers) {
781+
// The address of a projection out of the root storage which would be
782+
// folded if folding is possible.
783+
//
784+
// If no such address is found, we need to check whether the instruction
785+
// is a barrier.
786+
SILValue address;
787+
if (auto *load = dyn_cast<LoadInst>(instruction)) {
788+
auto loadee = load->getOperand();
789+
auto relativeAccessStorage = RelativeAccessStorageWithBase::compute(loadee);
790+
if (relativeAccessStorage.getStorage().hasIdenticalStorage(storage)) {
791+
// If the access path from the loaded address to its root storage involves
792+
// a (layout non-equivalent) typecast--a load [take] of the casted address
793+
// would not be equivalent to a load [copy] followed by a destroy_addr of
794+
// the corresponding uncast projection--the truncated portion might have
795+
// refcounted components.
796+
if (relativeAccessStorage.cast == AccessStorageCast::Type)
619797
return true;
798+
if (load->getOwnershipQualifier() == LoadOwnershipQualifier::Copy) {
799+
address = loadee;
800+
loads.push_back(load);
620801
} else {
621-
assert(load->getOperand()->getType().isTrivial(*load->getFunction()));
622-
return false;
802+
assert(loadee->getType().isTrivial(*load->getFunction()));
803+
return true;
623804
}
624805
}
806+
} else if (auto *copy = dyn_cast<CopyAddrInst>(instruction)) {
807+
auto source = copy->getSrc();
808+
auto relativeAccessStorage = RelativeAccessStorageWithBase::compute(source);
809+
if (relativeAccessStorage.getStorage().hasIdenticalStorage(storage)) {
810+
// If the access path from the copy_addr'd address to its root storage
811+
// involves a (layout non-equivalent) typecast--a copy_addr [take] of the
812+
// casted address would not be equivalent to a copy_addr followed by a
813+
// destroy_addr of the corresponding uncast projection--the truncated
814+
// portion might have refcounted components.
815+
if (relativeAccessStorage.cast == AccessStorageCast::Type)
816+
return true;
817+
address = source;
818+
copies.push_back(copy);
819+
}
625820
}
626-
if (auto *copy = dyn_cast<CopyAddrInst>(barrier)) {
627-
if (stripAccessMarkers(copy->getSrc()) == stripAccessMarkers(storageRoot)) {
628-
assert(!copy->isTakeOfSrc());
629-
copy->setIsTakeOfSrc(IsTake);
821+
if (address) {
822+
// We found a relevant instruction that is operating on a projection out
823+
// of the root storage which would be folded if folding were possible.
824+
// Find its nontrivial product leaves and remove them from the set of
825+
// leaves of the root storage which we're wating to see.
826+
bool alreadySawLeaf = false;
827+
visitProductLeafAccessPathNodes(address, typeExpansionContext, module,
828+
[&](AccessPath::PathNode node, SILType ty) {
829+
if (ty.isTrivial(*function))
830+
return;
831+
bool erased = leaves.erase(node);
832+
alreadySawLeaf =
833+
alreadySawLeaf || !erased;
834+
});
835+
if (alreadySawLeaf) {
836+
// We saw this non-trivial product leaf already. That means there are
837+
// multiple load [copy]s or copy_addrs of at least one product leaf
838+
// before (walking backwards from the hoisting point) there are
839+
// instructions that load or copy from all the non-trivial leaves.
840+
// Give up on folding.
630841
return true;
631842
}
843+
} else if (deinitBarriers.isBarrier(instruction)) {
844+
// We didn't find an instruction that was both
845+
// - relevant (i.e. a copy_addr or a load [take])
846+
// - operating on a projection of the root storage
847+
// Additionally:
848+
// - we can't ignore whether it's a barrier
849+
// - and it IS a barrier.
850+
// We can't fold.
851+
return true;
632852
}
633853
return false;
634854
}
635855

636856
bool HoistDestroys::foldBarrier(SILInstruction *barrier,
857+
const AccessStorage &storage,
637858
const KnownStorageUses &knownUses,
638859
const DeinitBarriers &deinitBarriers) {
639860
if (auto *eai = dyn_cast<EndAccessInst>(barrier)) {
@@ -645,13 +866,13 @@ bool HoistDestroys::foldBarrier(SILInstruction *barrier,
645866
while ((instruction = instruction->getPreviousInstruction())) {
646867
if (instruction == bai)
647868
return false;
648-
if (foldBarrier(instruction, storageRoot))
869+
if (foldBarrier(instruction, storage, deinitBarriers))
649870
return true;
650871
if (deinitBarriers.isBarrier(instruction))
651872
return false;
652873
}
653874
}
654-
return foldBarrier(barrier, storageRoot);
875+
return foldBarrier(barrier, storage, deinitBarriers);
655876
}
656877

657878
// \p barrier may be null if the destroy is at function entry.
@@ -729,6 +950,12 @@ bool hoistDestroys(SILValue root, bool ignoreDeinitBarriers,
729950
// The algorithm assumes no critical edges.
730951
assert(function->hasOwnership() && "requires OSSA");
731952

953+
// If lexical lifetimes aren't enabled, then deinit barriers aren't respected.
954+
auto &module = function->getModule();
955+
auto enableLexicalLifetimes =
956+
module.getASTContext().SILOpts.supportsLexicalLifetimes(module);
957+
ignoreDeinitBarriers = ignoreDeinitBarriers || !enableLexicalLifetimes;
958+
732959
return HoistDestroys(root, ignoreDeinitBarriers, remainingDestroyAddrs,
733960
deleter)
734961
.perform();
@@ -842,8 +1069,7 @@ void SSADestroyHoisting::run() {
8421069
// We assume that the function is in reverse post order so visiting the
8431070
// blocks and pushing begin_access as we see them and then popping them off
8441071
// the end will result in hoisting inner begin_access' destroy_addrs first.
845-
while (!bais.empty()) {
846-
auto *bai = bais.pop_back_val();
1072+
for (auto *bai : llvm::reverse(bais)) {
8471073
changed |= hoistDestroys(bai, /*ignoreDeinitBarriers=*/true,
8481074
remainingDestroyAddrs, deleter);
8491075
}

0 commit comments

Comments
 (0)