Skip to content

SILOptimizer: fix non-deterministic behavior in RedundantLoadElimination and DeadStoreElimination. #19648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions include/swift/SILOptimizer/Utils/LoadStoreOptUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,6 @@ static inline llvm::hash_code hash_value(const LSValue &V) {
//===----------------------------------------------------------------------===//
// Load Store Location
//===----------------------------------------------------------------------===//
using LSLocationSet = llvm::DenseSet<LSLocation>;
using LSLocationList = llvm::SmallVector<LSLocation, 8>;
using LSLocationIndexMap = llvm::SmallDenseMap<LSLocation, unsigned, 32>;
using LSLocationBaseMap = llvm::DenseMap<SILValue, LSLocation>;
Expand Down Expand Up @@ -357,7 +356,7 @@ class LSLocation : public LSBase {

/// Given a set of locations derived from the same base, try to merge/reduce
/// them into smallest number of LSLocations possible.
static bool reduce(LSLocation Base, SILModule *Mod, LSLocationSet &Locs);
static void reduce(LSLocation Base, SILModule *Mod, LSLocationList &Locs);

/// Enumerate the given Mem LSLocation.
static void enumerateLSLocation(SILModule *M, SILValue Mem,
Expand Down
10 changes: 5 additions & 5 deletions lib/SILOptimizer/Transforms/DeadStoreElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ class BlockState {
SmallBitVector BBDeallocateLocation;

/// The dead stores in the current basic block.
llvm::DenseSet<SILInstruction *> DeadStores;
llvm::SmallVector<SILInstruction *, 2> DeadStores;

/// Keeps track of what stores to generate after the data flow stabilizes.
/// these stores come from partial dead stores.
Expand Down Expand Up @@ -946,22 +946,22 @@ void DSEContext::processWrite(SILInstruction *I, SILValue Val, SILValue Mem,
// instruction is dead.
if (Dead) {
LLVM_DEBUG(llvm::dbgs() << "Instruction Dead: " << *I << "\n");
S->DeadStores.insert(I);
S->DeadStores.push_back(I);
++NumDeadStores;
return;
}

// Partial dead store - stores to some locations are dead, but not all. This
// is a partially dead store. Also at this point we know what locations are
// dead.
llvm::DenseSet<LSLocation> Alives;
LSLocationList Alives;
if (V.any()) {
// Take out locations that are dead.
for (unsigned i = 0; i < V.size(); ++i) {
if (V.test(i))
continue;
// This location is alive.
Alives.insert(Locs[i]);
Alives.push_back(Locs[i]);
}

// Try to create as few aggregated stores as possible out of the locations.
Expand Down Expand Up @@ -994,7 +994,7 @@ void DSEContext::processWrite(SILInstruction *I, SILValue Val, SILValue Mem,

// Lastly, mark the old store as dead.
LLVM_DEBUG(llvm::dbgs() << "Instruction Partially Dead: " << *I << "\n");
S->DeadStores.insert(I);
S->DeadStores.push_back(I);
++NumPartialDeadStores;
}
}
Expand Down
16 changes: 7 additions & 9 deletions lib/SILOptimizer/Transforms/RedundantLoadElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,6 @@ class BlockState {

namespace {

using BBValueMap = llvm::DenseMap<SILBasicBlock *, SILValue>;

/// This class stores global state that we use when computing redundant load and
/// their replacement in each basic block.
class RLEContext {
Expand Down Expand Up @@ -1248,7 +1246,7 @@ BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx,

SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
LSLocation &L) {
BBValueMap Values;
llvm::SmallVector<std::pair<SILBasicBlock *, SILValue>, 8> Values;
llvm::DenseSet<SILBasicBlock *> HandledBBs;
llvm::SmallVector<SILBasicBlock *, 8> WorkList;

Expand Down Expand Up @@ -1277,7 +1275,7 @@ SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
// locations, collect and reduce them into a single value in the current
// basic block.
if (Forwarder.isConcreteValues(*this, L)) {
Values[CurBB] = Forwarder.reduceValuesAtEndOfBlock(*this, L);
Values.push_back({CurBB, Forwarder.reduceValuesAtEndOfBlock(*this, L)});
continue;
}

Expand All @@ -1301,7 +1299,7 @@ SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,

// Reduce the available values into a single SILValue we can use to forward
SILInstruction *IPt = CurBB->getTerminator();
Values[CurBB] = LSValue::reduce(L, &BB->getModule(), LSValues, IPt);
Values.push_back({CurBB, LSValue::reduce(L, &BB->getModule(), LSValues, IPt)});
}

// Finally, collect all the values for the SILArgument, materialize it using
Expand All @@ -1317,7 +1315,7 @@ SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L,
LSLocationValueMap &Values,
ValueTableMap &VM) {
LSLocationSet CSLocs;
LSLocationList CSLocs;
LSLocationList Locs;
LSLocation::expand(L, &BB->getModule(), Locs, TE);

Expand All @@ -1328,7 +1326,7 @@ bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L,
Values[X] = getValue(VM[getLocationBit(X)]);
if (!Values[X].isCoveringValue())
continue;
CSLocs.insert(X);
CSLocs.push_back(X);
}

// For locations which we do not have concrete values for in this basic
Expand Down Expand Up @@ -1563,7 +1561,7 @@ bool RLEContext::run() {
processBasicBlocksForRLE(Optimistic);

// Finally, perform the redundant load replacements.
llvm::DenseSet<SILInstruction *> InstsToDelete;
llvm::SmallVector<SILInstruction *, 16> InstsToDelete;
bool SILChanged = false;
for (auto &B : *Fn) {
auto &State = BBToLocState[&B];
Expand All @@ -1587,7 +1585,7 @@ bool RLEContext::run() {
<< "With " << Iter->second);
SILChanged = true;
Iter->first->replaceAllUsesWith(Iter->second);
InstsToDelete.insert(Iter->first);
InstsToDelete.push_back(Iter->first);
++NumForwardedLoads;
}
}
Expand Down
4 changes: 2 additions & 2 deletions lib/SILOptimizer/UtilityPasses/LSLocationPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ class LSLocationPrinter : public SILModuleTransform {
void printMemReduction(SILFunction &Fn) {
LSLocation L;
LSLocationList Locs;
llvm::DenseSet<LSLocation> SLocs;
LSLocationList SLocs;
unsigned Counter = 0;
for (auto &BB : Fn) {
for (auto &II : BB) {
Expand Down Expand Up @@ -212,7 +212,7 @@ class LSLocationPrinter : public SILModuleTransform {
// Reduction should not care about the order of the memory locations in
// the set.
for (auto I = Locs.begin(); I != Locs.end(); ++I) {
SLocs.insert(*I);
SLocs.push_back(*I);
}

// This should get the original (unexpanded) location back.
Expand Down
64 changes: 45 additions & 19 deletions lib/SILOptimizer/Utils/LoadStoreOptUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,36 +185,62 @@ LSLocation::expand(LSLocation Base, SILModule *M, LSLocationList &Locs,
}
}

bool
LSLocation::reduce(LSLocation Base, SILModule *M, LSLocationSet &Locs) {
/// Gets the sub-locations of \p Base in \p SubLocations.
/// Returns false if this is not possible or too complex.
static bool
getSubLocations(LSLocationList &SubLocations, LSLocation Base, SILModule *M,
const LSLocationList &Locs) {
// If this is a class reference type, we have reached end of the type tree.
if (Base.getType(M).getClassOrBoundGenericClass())
return Locs.find(Base) != Locs.end();
return false;

// This a don't expand node.
if (!shouldExpand(*M, Base.getType(M))) {
return Locs.find(Base) != Locs.end();
// Don't expand if it would be too complex. As Locs is a list (and not a set)
// we want to avoid quadratic complexity in replaceInner().
// Usually Locs is small anyway, because we limit expansion to 6 members.
// But with deeply nested types we could run in a corner case where Locs is
// large.
if (!shouldExpand(*M, Base.getType(M)) || Locs.size() >= 8) {
return false;
}

// This is a leaf node.
LSLocationList NextLevel;
Base.getNextLevelLSLocations(NextLevel, M);
if (NextLevel.empty())
return Locs.find(Base) != Locs.end();
Base.getNextLevelLSLocations(SubLocations, M);
return !SubLocations.empty();
}

// This is not a leaf node, try to find whether all its children are alive.
/// Replaces \p SubLocations with \p Base in \p Locs if all sub-locations are
/// alive, i.e. present in \p Locs.
static bool
replaceSubLocations(LSLocation Base, SILModule *M, LSLocationList &Locs,
const LSLocationList &SubLocations) {
// Find whether all its children of Base are alive.
bool Alive = true;
for (auto &X : NextLevel) {
Alive &= LSLocation::reduce(X, M, Locs);
for (auto &X : SubLocations) {
// Recurse into the next level.
LSLocationList NextInnerLevel;
if (getSubLocations(NextInnerLevel, X, M, Locs)) {
Alive &= replaceSubLocations(X, M, Locs, NextInnerLevel);
} else {
Alive &= is_contained(Locs, X);
}
}

// All next level locations are alive, create the new aggregated location.
if (Alive) {
for (auto &X : NextLevel)
Locs.erase(X);
Locs.insert(Base);
}
return Alive;
if (!Alive)
return false;

auto newEnd = std::remove_if(Locs.begin(), Locs.end(), [&](const LSLocation &L) {
return is_contained(SubLocations, L);
});
Locs.erase(newEnd, Locs.end());
Locs.push_back(Base);
return true;
}

void LSLocation::reduce(LSLocation Base, SILModule *M, LSLocationList &Locs) {
LSLocationList SubLocations;
if (getSubLocations(SubLocations, Base, M, Locs))
replaceSubLocations(Base, M, Locs, SubLocations);
}

void
Expand Down