@@ -714,10 +714,13 @@ bool LoopTreeOptimization::isSafeReadOnlyApply(BasicCalleeAnalysis *BCA, ApplyIn
714
714
715
715
static void checkSideEffects (swift::SILInstruction &Inst,
716
716
InstSet &SideEffectInsts,
717
- SmallVectorImpl<SILInstruction *> &sideEffectsInBlock) {
717
+ SmallVectorImpl<SILInstruction *> &sideEffectsInBlock,
718
+ bool &hasOtherMemReadingInsts) {
718
719
if (Inst.mayHaveSideEffects ()) {
719
720
SideEffectInsts.insert (&Inst);
720
721
sideEffectsInBlock.push_back (&Inst);
722
+ } else if (Inst.mayReadFromMemory ()) {
723
+ hasOtherMemReadingInsts = true ;
721
724
}
722
725
}
723
726
@@ -885,11 +888,15 @@ void LoopTreeOptimization::analyzeCurrentLoop(
885
888
SmallVector<BeginAccessInst *, 8 > BeginAccesses;
886
889
SmallVector<FullApplySite, 8 > fullApplies;
887
890
891
+ // True if the loop has instructions which (may) read from memory, which are not
892
+ // in `Loads` and not in `sideEffects`.
893
+ bool hasOtherMemReadingInsts = false ;
894
+
888
895
for (auto *BB : Loop->getBlocks ()) {
889
896
SmallVector<SILInstruction *, 8 > sideEffectsInBlock;
890
897
for (auto &Inst : *BB) {
891
898
if (hasOwnershipOperandsOrResults (&Inst)) {
892
- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
899
+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
893
900
// Collect fullApplies to be checked in analyzeBeginAccess
894
901
if (auto fullApply = FullApplySite::isa (&Inst)) {
895
902
fullApplies.push_back (fullApply);
@@ -921,12 +928,12 @@ void LoopTreeOptimization::analyzeCurrentLoop(
921
928
}
922
929
Stores.push_back (store);
923
930
LoadsAndStores.push_back (&Inst);
924
- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
931
+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
925
932
break ;
926
933
}
927
934
case SILInstructionKind::BeginAccessInst:
928
935
BeginAccesses.push_back (cast<BeginAccessInst>(&Inst));
929
- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
936
+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
930
937
break ;
931
938
case SILInstructionKind::RefElementAddrInst:
932
939
SpecialHoist.push_back (cast<RefElementAddrInst>(&Inst));
@@ -937,7 +944,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
937
944
// cond_fail that would have protected (executed before) a memory access
938
945
// must - after hoisting - also be executed before said access.
939
946
HoistUp.insert (&Inst);
940
- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
947
+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
941
948
break ;
942
949
case SILInstructionKind::ApplyInst: {
943
950
auto *AI = cast<ApplyInst>(&Inst);
@@ -971,7 +978,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
971
978
}
972
979
}
973
980
974
- checkSideEffects (Inst, sideEffects, sideEffectsInBlock);
981
+ checkSideEffects (Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts );
975
982
if (canHoistUpDefault (&Inst, Loop, DomTree, RunsOnHighLevelSIL)) {
976
983
HoistUp.insert (&Inst);
977
984
}
@@ -1013,23 +1020,25 @@ void LoopTreeOptimization::analyzeCurrentLoop(
1013
1020
}
1014
1021
}
1015
1022
1016
- // Collect memory locations for which we can move all loads and stores out
1017
- // of the loop.
1018
- //
1019
- // Note: The Loads set and LoadsAndStores set may mutate during this loop.
1020
- for (StoreInst *SI : Stores) {
1021
- // Use AccessPathWithBase to recover a base address that can be used for
1022
- // newly inserted memory operations. If we instead teach hoistLoadsAndStores
1023
- // how to rematerialize global_addr, then we don't need this base.
1024
- auto access = AccessPathWithBase::compute (SI->getDest ());
1025
- auto accessPath = access.accessPath ;
1026
- if (accessPath.isValid () &&
1027
- (access.base && isLoopInvariant (access.base , Loop))) {
1028
- if (isOnlyLoadedAndStored (AA, sideEffects, Loads, Stores, SI->getDest (),
1029
- accessPath)) {
1030
- if (!LoadAndStoreAddrs.count (accessPath)) {
1031
- if (splitLoads (Loads, accessPath, SI->getDest ())) {
1032
- LoadAndStoreAddrs.insert (accessPath);
1023
+ if (!hasOtherMemReadingInsts) {
1024
+ // Collect memory locations for which we can move all loads and stores out
1025
+ // of the loop.
1026
+ //
1027
+ // Note: The Loads set and LoadsAndStores set may mutate during this loop.
1028
+ for (StoreInst *SI : Stores) {
1029
+ // Use AccessPathWithBase to recover a base address that can be used for
1030
+ // newly inserted memory operations. If we instead teach hoistLoadsAndStores
1031
+ // how to rematerialize global_addr, then we don't need this base.
1032
+ auto access = AccessPathWithBase::compute (SI->getDest ());
1033
+ auto accessPath = access.accessPath ;
1034
+ if (accessPath.isValid () &&
1035
+ (access.base && isLoopInvariant (access.base , Loop))) {
1036
+ if (isOnlyLoadedAndStored (AA, sideEffects, Loads, Stores, SI->getDest (),
1037
+ accessPath)) {
1038
+ if (!LoadAndStoreAddrs.count (accessPath)) {
1039
+ if (splitLoads (Loads, accessPath, SI->getDest ())) {
1040
+ LoadAndStoreAddrs.insert (accessPath);
1041
+ }
1033
1042
}
1034
1043
}
1035
1044
}
0 commit comments