Skip to content

Commit 62cfed6

Browse files
authored
Merge pull request #81189 from eeckstein/fix-licm-6.2
LICM: handle memory dependency for store sinking correctly
2 parents 1f25173 + 2041712 commit 62cfed6

File tree

2 files changed

+69
-23
lines changed

2 files changed

+69
-23
lines changed

lib/SILOptimizer/LoopTransforms/LICM.cpp

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -714,10 +714,13 @@ bool LoopTreeOptimization::isSafeReadOnlyApply(BasicCalleeAnalysis *BCA, ApplyIn
714714

715715
static void checkSideEffects(swift::SILInstruction &Inst,
716716
InstSet &SideEffectInsts,
717-
SmallVectorImpl<SILInstruction *> &sideEffectsInBlock) {
717+
SmallVectorImpl<SILInstruction *> &sideEffectsInBlock,
718+
bool &hasOtherMemReadingInsts) {
718719
if (Inst.mayHaveSideEffects()) {
719720
SideEffectInsts.insert(&Inst);
720721
sideEffectsInBlock.push_back(&Inst);
722+
} else if (Inst.mayReadFromMemory()) {
723+
hasOtherMemReadingInsts = true;
721724
}
722725
}
723726

@@ -885,11 +888,15 @@ void LoopTreeOptimization::analyzeCurrentLoop(
885888
SmallVector<BeginAccessInst *, 8> BeginAccesses;
886889
SmallVector<FullApplySite, 8> fullApplies;
887890

891+
// True if the loop has instructions which (may) read from memory, which are not
892+
// in `Loads` and not in `sideEffects`.
893+
bool hasOtherMemReadingInsts = false;
894+
888895
for (auto *BB : Loop->getBlocks()) {
889896
SmallVector<SILInstruction *, 8> sideEffectsInBlock;
890897
for (auto &Inst : *BB) {
891898
if (hasOwnershipOperandsOrResults(&Inst)) {
892-
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
899+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
893900
// Collect fullApplies to be checked in analyzeBeginAccess
894901
if (auto fullApply = FullApplySite::isa(&Inst)) {
895902
fullApplies.push_back(fullApply);
@@ -921,12 +928,12 @@ void LoopTreeOptimization::analyzeCurrentLoop(
921928
}
922929
Stores.push_back(store);
923930
LoadsAndStores.push_back(&Inst);
924-
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
931+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
925932
break;
926933
}
927934
case SILInstructionKind::BeginAccessInst:
928935
BeginAccesses.push_back(cast<BeginAccessInst>(&Inst));
929-
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
936+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
930937
break;
931938
case SILInstructionKind::RefElementAddrInst:
932939
SpecialHoist.push_back(cast<RefElementAddrInst>(&Inst));
@@ -937,7 +944,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
937944
// cond_fail that would have protected (executed before) a memory access
938945
// must - after hoisting - also be executed before said access.
939946
HoistUp.insert(&Inst);
940-
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
947+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
941948
break;
942949
case SILInstructionKind::ApplyInst: {
943950
auto *AI = cast<ApplyInst>(&Inst);
@@ -971,7 +978,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
971978
}
972979
}
973980

974-
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
981+
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
975982
if (canHoistUpDefault(&Inst, Loop, DomTree, RunsOnHighLevelSIL)) {
976983
HoistUp.insert(&Inst);
977984
}
@@ -1013,23 +1020,25 @@ void LoopTreeOptimization::analyzeCurrentLoop(
10131020
}
10141021
}
10151022

1016-
// Collect memory locations for which we can move all loads and stores out
1017-
// of the loop.
1018-
//
1019-
// Note: The Loads set and LoadsAndStores set may mutate during this loop.
1020-
for (StoreInst *SI : Stores) {
1021-
// Use AccessPathWithBase to recover a base address that can be used for
1022-
// newly inserted memory operations. If we instead teach hoistLoadsAndStores
1023-
// how to rematerialize global_addr, then we don't need this base.
1024-
auto access = AccessPathWithBase::compute(SI->getDest());
1025-
auto accessPath = access.accessPath;
1026-
if (accessPath.isValid() &&
1027-
(access.base && isLoopInvariant(access.base, Loop))) {
1028-
if (isOnlyLoadedAndStored(AA, sideEffects, Loads, Stores, SI->getDest(),
1029-
accessPath)) {
1030-
if (!LoadAndStoreAddrs.count(accessPath)) {
1031-
if (splitLoads(Loads, accessPath, SI->getDest())) {
1032-
LoadAndStoreAddrs.insert(accessPath);
1023+
if (!hasOtherMemReadingInsts) {
1024+
// Collect memory locations for which we can move all loads and stores out
1025+
// of the loop.
1026+
//
1027+
// Note: The Loads set and LoadsAndStores set may mutate during this loop.
1028+
for (StoreInst *SI : Stores) {
1029+
// Use AccessPathWithBase to recover a base address that can be used for
1030+
// newly inserted memory operations. If we instead teach hoistLoadsAndStores
1031+
// how to rematerialize global_addr, then we don't need this base.
1032+
auto access = AccessPathWithBase::compute(SI->getDest());
1033+
auto accessPath = access.accessPath;
1034+
if (accessPath.isValid() &&
1035+
(access.base && isLoopInvariant(access.base, Loop))) {
1036+
if (isOnlyLoadedAndStored(AA, sideEffects, Loads, Stores, SI->getDest(),
1037+
accessPath)) {
1038+
if (!LoadAndStoreAddrs.count(accessPath)) {
1039+
if (splitLoads(Loads, accessPath, SI->getDest())) {
1040+
LoadAndStoreAddrs.insert(accessPath);
1041+
}
10331042
}
10341043
}
10351044
}

test/SILOptimizer/licm.sil

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ struct NonCopyable : ~Copyable {
1818
var x: Int
1919
}
2020

21+
struct S {
22+
var i: Int
23+
var s: String
24+
}
25+
2126
// globalArray
2227
sil_global @globalArray : $Storage
2328

@@ -1667,3 +1672,35 @@ bb3:
16671672
return %r : $()
16681673
}
16691674

1675+
// CHECK-LABEL: sil [ossa] @store_and_load_borrow :
1676+
// CHECK: bb1({{.*}}):
1677+
// CHECK: store %1 to [trivial]
1678+
// CHECK: load_borrow
1679+
// CHECK: bb2:
1680+
// CHECK-LABEL: } // end sil function 'store_and_load_borrow'
1681+
sil [ossa] @store_and_load_borrow : $@convention(thin) (@inout S, Int) -> () {
1682+
bb0(%0 : $*S, %1 : $Int):
1683+
%2 = load_borrow %0
1684+
%3 = struct_element_addr %0, #S.i
1685+
br bb1(%2)
1686+
1687+
bb1(%4 : @reborrow $S):
1688+
%5 = borrowed %4 from ()
1689+
end_borrow %5
1690+
store %1 to [trivial] %3
1691+
%10 = load_borrow %0
1692+
cond_br undef, bb2, bb3
1693+
1694+
bb2:
1695+
br bb1(%10)
1696+
1697+
bb3:
1698+
br bb4(%10)
1699+
1700+
bb4(%14 : @reborrow $S):
1701+
%15 = borrowed %14 from ()
1702+
end_borrow %15
1703+
%r = tuple()
1704+
return %r
1705+
}
1706+

0 commit comments

Comments
 (0)