Skip to content

Commit ced5f4a

Browse files
committed
[pmo] Teach PMO that in ossa, load [take] invalidates an available value.
Example would be a case where we dynamically control if the load [take] occurs and the load [take] happens with conditional control flow. This was exposed by the throwing_initializer and failable_initializer interpreter tests in the test of PolarBear(n:before:during).
1 parent f243bc5 commit ced5f4a

File tree

2 files changed

+385
-50
lines changed

2 files changed

+385
-50
lines changed

lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp

Lines changed: 207 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ static unsigned computeSubelement(SILValue Pointer,
147147
continue;
148148
}
149149

150-
150+
// This fails when we visit unchecked_take_enum_data_addr. We should just
151+
// add support for enums.
151152
assert(isa<InitExistentialAddrInst>(Pointer) &&
152153
"Unknown access path instruction");
153154
// Cannot promote loads and stores from within an existential projection.
@@ -233,7 +234,7 @@ struct AvailableValue {
233234
InsertionPoints.set_union(Other.InsertionPoints);
234235
}
235236

236-
void addInsertionPoint(StoreInst *I) & { InsertionPoints.insert(I); }
237+
void addInsertionPoint(StoreInst *si) & { InsertionPoints.insert(si); }
237238

238239
AvailableValue emitStructExtract(SILBuilder &B, SILLocation Loc, VarDecl *D,
239240
unsigned SubElementNumber) const {
@@ -845,8 +846,18 @@ AvailableValueAggregator::addMissingDestroysForCopiedValues(
845846
namespace {
846847

847848
/// Given a piece of memory, the memory's uses, and destroys perform a single
848-
/// round of optimistic dataflow switching to intersection when a back edge is
849-
/// encountered.
849+
/// round of semi-optimistic backwards dataflow for each use. The result is the
850+
/// set of available values that reach the specific use of the field in the
851+
/// allocated object.
852+
///
853+
/// The general form of the algorithm is that in our constructor, we analyze our
854+
/// uses and determine available values. Then users call computeAvailableValues
855+
/// which looks backwards up the control flow graph for available values that we
856+
/// can use.
857+
///
858+
/// NOTE: The reason why we say that the algorithm is semi-optimistic is that we
859+
/// assume that all incoming elements into a loopheader will be the same. If we
860+
/// find a conflict, we record it and fail.
850861
class AvailableValueDataflowContext {
851862
/// The base memory we are performing dataflow upon.
852863
AllocationInst *TheMemory;
@@ -862,9 +873,21 @@ class AvailableValueDataflowContext {
862873
/// The set of blocks with local definitions.
863874
///
864875
/// We use this to determine if we should visit a block or look at a block's
865-
/// predecessors during dataflow.
876+
/// predecessors during dataflow for an available value.
866877
llvm::SmallPtrSet<SILBasicBlock *, 32> HasLocalDefinition;
867878

879+
/// The set of blocks that have definitions which specifically "kill" the
880+
/// given value. If a block is in this set, there must be an instruction in
881+
/// LoadTakeUse whose parent is the block. This is just used to speed up
882+
/// computation.
883+
///
884+
/// NOTE: These are not considered escapes.
885+
llvm::SmallPtrSet<SILBasicBlock *, 32> HasLocalKill;
886+
887+
/// This is a set of load takes that we are tracking. HasLocalKill is the set
888+
/// of parent blocks of these instructions.
889+
llvm::SmallPtrSet<SILInstruction *, 8> LoadTakeUses;
890+
868891
/// This is a map of uses that are not loads (i.e., they are Stores,
869892
/// InOutUses, and Escapes), to their entry in Uses.
870893
llvm::SmallDenseMap<SILInstruction *, unsigned, 16> NonLoadUses;
@@ -925,10 +948,36 @@ AvailableValueDataflowContext::AvailableValueDataflowContext(
925948
auto &Use = Uses[ui];
926949
assert(Use.Inst && "No instruction identified?");
927950

928-
// Keep track of all the uses that aren't loads.
929-
if (Use.Kind == PMOUseKind::Load)
930-
continue;
951+
// If we have a load...
952+
if (Use.Kind == PMOUseKind::Load) {
953+
// Skip load borrow use and open_existential_addr.
954+
if (isa<LoadBorrowInst>(Use.Inst) || isa<OpenExistentialAddrInst>(Use.Inst))
955+
continue;
956+
957+
// That is not a load take, continue. Otherwise, stash the load [take].
958+
if (auto *LI = dyn_cast<LoadInst>(Use.Inst)) {
959+
if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
960+
LoadTakeUses.insert(LI);
961+
HasLocalKill.insert(LI->getParent());
962+
}
963+
continue;
964+
}
965+
966+
// If we have a copy_addr as our load, it means we are processing a source
967+
// of the value. If the copy_addr is taking from the source, we need to
968+
// treat it like a load take use.
969+
if (auto *CAI = dyn_cast<CopyAddrInst>(Use.Inst)) {
970+
if (CAI->isTakeOfSrc() == IsTake) {
971+
LoadTakeUses.insert(CAI);
972+
HasLocalKill.insert(CAI->getParent());
973+
}
974+
continue;
975+
}
976+
977+
llvm_unreachable("Unhandled SILInstructionKind for PMOUseKind::Load?!");
978+
}
931979

980+
// Keep track of all the uses that aren't loads.
932981
NonLoadUses[Use.Inst] = ui;
933982
HasLocalDefinition.insert(Use.Inst->getParent());
934983

@@ -945,50 +994,149 @@ AvailableValueDataflowContext::AvailableValueDataflowContext(
945994
HasLocalDefinition.insert(TheMemory->getParent());
946995
}
947996

948-
void AvailableValueDataflowContext::updateAvailableValues(
949-
SILInstruction *Inst, SmallBitVector &RequiredElts,
950-
SmallVectorImpl<AvailableValue> &Result,
951-
SmallBitVector &ConflictingValues) {
952-
// Handle store.
953-
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
954-
unsigned StartSubElt = computeSubelement(SI->getDest(), TheMemory);
955-
assert(StartSubElt != ~0U && "Store within enum projection not handled");
956-
SILType ValTy = SI->getSrc()->getType();
997+
// This function takes in the current (potentially uninitialized) available
998+
// values for theMemory and for the subset of AvailableValues corresponding to
999+
// \p address either:
1000+
//
1001+
// 1. If uninitialized, optionally initialize the available value with a new
1002+
// SILValue. It is optional since in certain cases, (for instance when
1003+
// invalidating one just wants to skip empty available values).
1004+
//
1005+
// 2. Given an initialized value, either add the given instruction as an
1006+
// insertion point or state that we have a conflict.
1007+
static inline void updateAvailableValuesHelper(
1008+
SingleValueInstruction *theMemory, SILInstruction *inst, SILValue address,
1009+
SmallBitVector &requiredElts, SmallVectorImpl<AvailableValue> &result,
1010+
SmallBitVector &conflictingValues,
1011+
function_ref<Optional<AvailableValue>(unsigned)> defaultFunc,
1012+
function_ref<bool(AvailableValue &, unsigned)> isSafeFunc) {
1013+
auto &mod = theMemory->getModule();
1014+
unsigned startSubElt = computeSubelement(address, theMemory);
1015+
1016+
// TODO: Is this needed now?
1017+
assert(startSubElt != ~0U && "Store within enum projection not handled");
1018+
for (unsigned i :
1019+
range(getNumSubElements(address->getType().getObjectType(), mod))) {
1020+
// If this element is not required, don't fill it in.
1021+
if (!requiredElts[startSubElt + i])
1022+
continue;
9571023

958-
for (unsigned i : range(getNumSubElements(ValTy, getModule()))) {
959-
// If this element is not required, don't fill it in.
960-
if (!RequiredElts[StartSubElt+i]) continue;
1024+
// At this point we know that we will either mark the value as conflicting
1025+
// or give it a value.
1026+
requiredElts[startSubElt + i] = false;
1027+
1028+
// First see if we have an entry at all.
1029+
auto &entry = result[startSubElt + i];
1030+
1031+
// If we don't...
1032+
if (!entry) {
1033+
// and we are told to initialize it, do so.
1034+
if (auto defaultValue = defaultFunc(i)) {
1035+
entry = std::move(defaultValue.getValue());
1036+
} else {
1037+
// Otherwise, mark this as a conflicting value. There is some available
1038+
// value here, we just do not know what it is at this point. This
1039+
// ensures that if we visit a kill where we do not have an entry yet, we
1040+
// properly invalidate our state.
1041+
conflictingValues[startSubElt + i] = true;
1042+
}
1043+
continue;
1044+
}
9611045

962-
// This element is now provided.
963-
RequiredElts[StartSubElt + i] = false;
1046+
// Check if our caller thinks that the value currently in entry is
1047+
// compatible with \p inst. If not, mark the values as conflicting and
1048+
// continue.
1049+
if (!isSafeFunc(entry, i)) {
1050+
conflictingValues[startSubElt + i] = true;
1051+
continue;
1052+
}
9641053

965-
// If there is no result computed for this subelement, record it. If
966-
// there already is a result, check it for conflict. If there is no
967-
// conflict, then we're ok.
968-
auto &Entry = Result[StartSubElt+i];
969-
if (!Entry) {
970-
Entry = {SI->getSrc(), i, SI};
971-
continue;
972-
}
1054+
// Otherwise, we found another insertion point for our available
1055+
// value. Today this will always be a Store.
1056+
entry.addInsertionPoint(cast<StoreInst>(inst));
1057+
}
1058+
}
9731059

974-
// TODO: This is /really/, /really/, conservative. This basically means
975-
// that if we do not have an identical store, we will not promote.
976-
if (Entry.getValue() != SI->getSrc() ||
977-
Entry.getSubElementNumber() != i) {
978-
ConflictingValues[StartSubElt + i] = true;
979-
continue;
980-
}
1060+
void AvailableValueDataflowContext::updateAvailableValues(
1061+
SILInstruction *Inst, SmallBitVector &RequiredElts,
1062+
SmallVectorImpl<AvailableValue> &Result,
1063+
SmallBitVector &ConflictingValues) {
9811064

982-
Entry.addInsertionPoint(SI);
1065+
// If we are visiting a load [take], it invalidates the underlying available
1066+
// values.
1067+
//
1068+
// NOTE: Since we are always looking back from the instruction to promote,
1069+
// when we attempt to promote the load [take] itself, we will never hit this
1070+
// code since.
1071+
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
1072+
// First see if this is a load inst that we are tracking.
1073+
if (LoadTakeUses.count(LI)) {
1074+
updateAvailableValuesHelper(TheMemory, LI, LI->getOperand(), RequiredElts,
1075+
Result, ConflictingValues,
1076+
/*default*/
1077+
[](unsigned) -> Optional<AvailableValue> {
1078+
// We never initialize values. We only
1079+
// want to invalidate.
1080+
return None;
1081+
},
1082+
/*isSafe*/
1083+
[](AvailableValue &, unsigned) -> bool {
1084+
// Always assume values conflict.
1085+
return false;
1086+
});
1087+
return;
9831088
}
1089+
}
9841090

1091+
// Handle store.
1092+
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
1093+
updateAvailableValuesHelper(
1094+
TheMemory, SI, SI->getDest(), RequiredElts, Result, ConflictingValues,
1095+
/*default*/
1096+
[&](unsigned ResultOffset) -> Optional<AvailableValue> {
1097+
Optional<AvailableValue> Result;
1098+
Result.emplace(SI->getSrc(), ResultOffset, SI);
1099+
return Result;
1100+
},
1101+
/*isSafe*/
1102+
[&](AvailableValue &Entry, unsigned ResultOffset) -> bool {
1103+
// TODO: This is /really/, /really/, conservative. This basically
1104+
// means that if we do not have an identical store, we will not
1105+
// promote.
1106+
return Entry.getValue() == SI->getSrc() &&
1107+
Entry.getSubElementNumber() == ResultOffset;
1108+
});
9851109
return;
9861110
}
987-
988-
// If we get here with a copy_addr, it must be storing into the element. Check
989-
// to see if any loaded subelements are being used, and if so, explode the
990-
// copy_addr to its individual pieces.
1111+
1112+
// If we got here from an apply, we must either be initializing the element
1113+
// via an @out parameter or we are trying to model an invalidating load of the
1114+
// value (e.x.: indirect_in, indirect_inout).
1115+
1116+
// If we get here with a copy_addr, we must either be storing into the element
1117+
// or tracking some sort of take of the src. First check if we are taking (in
1118+
// which case, we just track invalidation of src) and continue. Otherwise we
1119+
// must be storing into the copy_addr so see which loaded subelements are
1120+
// being used, and if so, explode the copy_addr to its individual pieces.
9911121
if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
1122+
// If we have a load take use, we must be tracking a store of CAI.
1123+
if (LoadTakeUses.count(CAI)) {
1124+
updateAvailableValuesHelper(TheMemory, CAI, CAI->getSrc(), RequiredElts,
1125+
Result, ConflictingValues,
1126+
/*default*/
1127+
[](unsigned) -> Optional<AvailableValue> {
1128+
// We never give values default initialized
1129+
// values. We only want to invalidate.
1130+
return None;
1131+
},
1132+
/*isSafe*/
1133+
[](AvailableValue &, unsigned) -> bool {
1134+
// Always assume values conflict.
1135+
return false;
1136+
});
1137+
return;
1138+
}
1139+
9921140
unsigned StartSubElt = computeSubelement(CAI->getDest(), TheMemory);
9931141
assert(StartSubElt != ~0U && "Store within enum projection not handled");
9941142
SILType ValTy = CAI->getDest()->getType();
@@ -1082,20 +1230,21 @@ void AvailableValueDataflowContext::computeAvailableValuesFrom(
10821230
&VisitedBlocks,
10831231
SmallBitVector &ConflictingValues) {
10841232
assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?");
1085-
1233+
10861234
// If there is a potential modification in the current block, scan the block
1087-
// to see if the store or escape is before or after the load. If it is
1088-
// before, check to see if it produces the value we are looking for.
1089-
if (HasLocalDefinition.count(BB)) {
1235+
// to see if the store, escape, or load [take] is before or after the load. If
1236+
// it is before, check to see if it produces the value we are looking for.
1237+
bool shouldCheckBlock =
1238+
HasLocalDefinition.count(BB) || HasLocalKill.count(BB);
1239+
if (shouldCheckBlock) {
10901240
for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) {
10911241
SILInstruction *TheInst = &*std::prev(BBI);
1092-
10931242
// If this instruction is unrelated to the element, ignore it.
1094-
if (!NonLoadUses.count(TheInst)) {
1243+
if (!NonLoadUses.count(TheInst) && !LoadTakeUses.count(TheInst)) {
10951244
--BBI;
10961245
continue;
10971246
}
1098-
1247+
10991248
// Given an interesting instruction, incorporate it into the set of
11001249
// results, and filter down the list of demanded subelements that we still
11011250
// need.
@@ -1112,7 +1261,6 @@ void AvailableValueDataflowContext::computeAvailableValuesFrom(
11121261
}
11131262
}
11141263

1115-
11161264
// Otherwise, we need to scan up the CFG looking for available values.
11171265
for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) {
11181266
SILBasicBlock *PredBB = *PI;
@@ -1174,6 +1322,9 @@ void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) {
11741322

11751323
// Update our internal state for this being gone.
11761324
NonLoadUses.erase(CAI);
1325+
LoadTakeUses.erase(CAI);
1326+
// NOTE: We do not need to update HasLocalKill since the copy_addr
1327+
// and the loads/stores will have the same parent block.
11771328

11781329
// Remove the copy_addr from Uses. A single copy_addr can appear multiple
11791330
// times if the source and dest are to elements within a single aggregate, but
@@ -1237,6 +1388,12 @@ void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) {
12371388
// "assign" operation on the destination of the copyaddr.
12381389
if (LoadUse.isValid() &&
12391390
getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) {
1391+
if (auto *LI = dyn_cast<LoadInst>(NewInst)) {
1392+
if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
1393+
LoadTakeUses.insert(LI);
1394+
HasLocalKill.insert(LI->getParent());
1395+
}
1396+
}
12401397
LoadUse.Inst = NewInst;
12411398
Uses.push_back(LoadUse);
12421399
}

0 commit comments

Comments
 (0)