Skip to content

Commit b139057

Browse files
authored
Merge pull request #3613 from fhahn/dse-mssa-opt-2
Pick DSE changes to eliminate redundant stores.
2 parents 7ced924 + e163436 commit b139057

File tree

2 files changed

+219
-37
lines changed

2 files changed

+219
-37
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 116 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,16 @@ static cl::opt<unsigned> MemorySSAPathCheckLimit(
159159
cl::desc("The maximum number of blocks to check when trying to prove that "
160160
"all paths to an exit go through a killing block (default = 50)"));
161161

162+
// This flags allows or disallows DSE to optimize MemorySSA during its
163+
// traversal. Note that DSE optimizing MemorySSA may impact other passes
164+
// downstream of the DSE invocation and can lead to issues not being
165+
// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166+
// those cases, the flag can be used to check if DSE's MemorySSA optimizations
167+
// impact follow-up passes.
168+
static cl::opt<bool>
169+
OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
170+
cl::desc("Allow DSE to optimize memory accesses."));
171+
162172
//===----------------------------------------------------------------------===//
163173
// Helper functions
164174
//===----------------------------------------------------------------------===//
@@ -329,6 +339,7 @@ enum OverwriteResult {
329339
OW_End,
330340
OW_PartialEarlierWithFullLater,
331341
OW_MaybePartial,
342+
OW_None,
332343
OW_Unknown
333344
};
334345

@@ -944,6 +955,7 @@ struct DSEState {
944955
/// Return OW_MaybePartial if \p KillingI does not completely overwrite
945956
/// \p DeadI, but they both write to the same underlying object. In that
946957
/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
958+
/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
947959
/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
948960
OverwriteResult isOverwrite(const Instruction *KillingI,
949961
const Instruction *DeadI,
@@ -1006,8 +1018,16 @@ struct DSEState {
10061018

10071019
// If we can't resolve the same pointers to the same object, then we can't
10081020
// analyze them at all.
1009-
if (DeadUndObj != KillingUndObj)
1021+
if (DeadUndObj != KillingUndObj) {
1022+
// Non aliasing stores to different objects don't overlap. Note that
1023+
// if the killing store is known to overwrite whole object (out of
1024+
// bounds access overwrites whole object as well) then it is assumed to
1025+
// completely overwrite any store to the same object even if they don't
1026+
// actually alias (see next check).
1027+
if (AAR == AliasResult::NoAlias)
1028+
return OW_None;
10101029
return OW_Unknown;
1030+
}
10111031

10121032
// If the KillingI store is to a recognizable object, get its size.
10131033
uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
@@ -1061,9 +1081,8 @@ struct DSEState {
10611081
return OW_MaybePartial;
10621082
}
10631083

1064-
// Can reach here only if accesses are known not to overlap. There is no
1065-
// dedicated code to indicate no overlap so signal "unknown".
1066-
return OW_Unknown;
1084+
// Can reach here only if accesses are known not to overlap.
1085+
return OW_None;
10671086
}
10681087

10691088
bool isInvisibleToCallerAfterRet(const Value *V) {
@@ -1354,6 +1373,15 @@ struct DSEState {
13541373
Instruction *KillingI = KillingDef->getMemoryInst();
13551374
LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
13561375

1376+
// Only optimize defining access of KillingDef when directly starting at its
1377+
// defining access. The defining access also must only access KillingLoc. At
1378+
// the moment we only support instructions with a single write location, so
1379+
// it should be sufficient to disable optimizations for instructions that
1380+
// also read from memory.
1381+
bool CanOptimize = OptimizeMemorySSA &&
1382+
KillingDef->getDefiningAccess() == StartAccess &&
1383+
!KillingI->mayReadFromMemory();
1384+
13571385
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
13581386
Optional<MemoryLocation> CurrentLoc;
13591387
for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
@@ -1395,8 +1423,10 @@ struct DSEState {
13951423
Instruction *CurrentI = CurrentDef->getMemoryInst();
13961424

13971425
if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
1398-
TLI))
1426+
TLI)) {
1427+
CanOptimize = false;
13991428
continue;
1429+
}
14001430

14011431
// Before we try to remove anything, check for any extra throwing
14021432
// instructions that block us from DSEing
@@ -1437,32 +1467,51 @@ struct DSEState {
14371467

14381468
// If Current does not have an analyzable write location, skip it
14391469
CurrentLoc = getLocForWriteEx(CurrentI);
1440-
if (!CurrentLoc)
1470+
if (!CurrentLoc) {
1471+
CanOptimize = false;
14411472
continue;
1473+
}
14421474

14431475
// AliasAnalysis does not account for loops. Limit elimination to
14441476
// candidates for which we can guarantee they always store to the same
14451477
// memory location and not located in different loops.
14461478
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
14471479
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
14481480
WalkerStepLimit -= 1;
1481+
CanOptimize = false;
14491482
continue;
14501483
}
14511484

14521485
if (IsMemTerm) {
14531486
// If the killing def is a memory terminator (e.g. lifetime.end), check
14541487
// the next candidate if the current Current does not write the same
14551488
// underlying object as the terminator.
1456-
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
1489+
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1490+
CanOptimize = false;
14571491
continue;
1492+
}
14581493
} else {
14591494
int64_t KillingOffset = 0;
14601495
int64_t DeadOffset = 0;
14611496
auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
14621497
KillingOffset, DeadOffset);
1498+
if (CanOptimize) {
1499+
// CurrentDef is the earliest write clobber of KillingDef. Use it as
1500+
// optimized access. Do not optimize if CurrentDef is already the
1501+
// defining access of KillingDef.
1502+
if (CurrentDef != KillingDef->getDefiningAccess() &&
1503+
(OR == OW_Complete || OR == OW_MaybePartial))
1504+
KillingDef->setOptimized(CurrentDef);
1505+
1506+
// Once a may-aliasing def is encountered do not set an optimized
1507+
// access.
1508+
if (OR != OW_None)
1509+
CanOptimize = false;
1510+
}
1511+
14631512
// If Current does not write to the same object as KillingDef, check
14641513
// the next candidate.
1465-
if (OR == OW_Unknown)
1514+
if (OR == OW_Unknown || OR == OW_None)
14661515
continue;
14671516
else if (OR == OW_MaybePartial) {
14681517
// If KillingDef only partially overwrites Current, check the next
@@ -1471,6 +1520,7 @@ struct DSEState {
14711520
// which are less likely to be removable in the end.
14721521
if (PartialLimit <= 1) {
14731522
WalkerStepLimit -= 1;
1523+
LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
14741524
continue;
14751525
}
14761526
PartialLimit -= 1;
@@ -1941,6 +1991,63 @@ struct DSEState {
19411991

19421992
return false;
19431993
}
1994+
1995+
/// Eliminates writes to locations where the value that is being written
1996+
/// is already stored at the same location.
1997+
bool eliminateRedundantStoresOfExistingValues() {
1998+
bool MadeChange = false;
1999+
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2000+
"already existing value\n");
2001+
for (auto *Def : MemDefs) {
2002+
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
2003+
!isRemovable(Def->getMemoryInst()))
2004+
continue;
2005+
MemoryDef *UpperDef;
2006+
// To conserve compile-time, we avoid walking to the next clobbering def.
2007+
// Instead, we just try to get the optimized access, if it exists. DSE
2008+
// will try to optimize defs during the earlier traversal.
2009+
if (Def->isOptimized())
2010+
UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2011+
else
2012+
UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2013+
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2014+
continue;
2015+
2016+
Instruction *DefInst = Def->getMemoryInst();
2017+
Instruction *UpperInst = UpperDef->getMemoryInst();
2018+
auto IsRedundantStore = [this, DefInst,
2019+
UpperInst](MemoryLocation UpperLoc) {
2020+
if (DefInst->isIdenticalTo(UpperInst))
2021+
return true;
2022+
if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2023+
if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2024+
auto MaybeDefLoc = getLocForWriteEx(DefInst);
2025+
if (!MaybeDefLoc)
2026+
return false;
2027+
int64_t InstWriteOffset = 0;
2028+
int64_t DepWriteOffset = 0;
2029+
auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
2030+
InstWriteOffset, DepWriteOffset);
2031+
Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2032+
return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2033+
OR == OW_Complete;
2034+
}
2035+
}
2036+
return false;
2037+
};
2038+
2039+
auto MaybeUpperLoc = getLocForWriteEx(UpperInst);
2040+
if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) ||
2041+
isReadClobber(*MaybeUpperLoc, DefInst))
2042+
continue;
2043+
LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2044+
<< '\n');
2045+
deleteDeadInstruction(DefInst);
2046+
NumRedundantStores++;
2047+
MadeChange = true;
2048+
}
2049+
return MadeChange;
2050+
}
19442051
};
19452052

19462053
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -2106,6 +2213,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
21062213
for (auto &KV : State.IOLs)
21072214
MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
21082215

2216+
MadeChange |= State.eliminateRedundantStoresOfExistingValues();
21092217
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
21102218
return MadeChange;
21112219
}

0 commit comments

Comments
 (0)