@@ -159,6 +159,16 @@ static cl::opt<unsigned> MemorySSAPathCheckLimit(
159
159
cl::desc(" The maximum number of blocks to check when trying to prove that "
160
160
" all paths to an exit go through a killing block (default = 50)" ));
161
161
162
+ // This flags allows or disallows DSE to optimize MemorySSA during its
163
+ // traversal. Note that DSE optimizing MemorySSA may impact other passes
164
+ // downstream of the DSE invocation and can lead to issues not being
165
+ // reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166
+ // those cases, the flag can be used to check if DSE's MemorySSA optimizations
167
+ // impact follow-up passes.
168
+ static cl::opt<bool >
169
+ OptimizeMemorySSA (" dse-optimize-memoryssa" , cl::init(true ), cl::Hidden,
170
+ cl::desc(" Allow DSE to optimize memory accesses." ));
171
+
162
172
// ===----------------------------------------------------------------------===//
163
173
// Helper functions
164
174
// ===----------------------------------------------------------------------===//
@@ -329,6 +339,7 @@ enum OverwriteResult {
329
339
OW_End,
330
340
OW_PartialEarlierWithFullLater,
331
341
OW_MaybePartial,
342
+ OW_None,
332
343
OW_Unknown
333
344
};
334
345
@@ -944,6 +955,7 @@ struct DSEState {
944
955
// / Return OW_MaybePartial if \p KillingI does not completely overwrite
945
956
// / \p DeadI, but they both write to the same underlying object. In that
946
957
// / case, use isPartialOverwrite to check if \p KillingI partially overwrites
958
+ // / \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
947
959
// / \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
948
960
OverwriteResult isOverwrite (const Instruction *KillingI,
949
961
const Instruction *DeadI,
@@ -1006,8 +1018,16 @@ struct DSEState {
1006
1018
1007
1019
// If we can't resolve the same pointers to the same object, then we can't
1008
1020
// analyze them at all.
1009
- if (DeadUndObj != KillingUndObj)
1021
+ if (DeadUndObj != KillingUndObj) {
1022
+ // Non aliasing stores to different objects don't overlap. Note that
1023
+ // if the killing store is known to overwrite whole object (out of
1024
+ // bounds access overwrites whole object as well) then it is assumed to
1025
+ // completely overwrite any store to the same object even if they don't
1026
+ // actually alias (see next check).
1027
+ if (AAR == AliasResult::NoAlias)
1028
+ return OW_None;
1010
1029
return OW_Unknown;
1030
+ }
1011
1031
1012
1032
// If the KillingI store is to a recognizable object, get its size.
1013
1033
uint64_t KillingUndObjSize = getPointerSize (KillingUndObj, DL, TLI, &F);
@@ -1061,9 +1081,8 @@ struct DSEState {
1061
1081
return OW_MaybePartial;
1062
1082
}
1063
1083
1064
- // Can reach here only if accesses are known not to overlap. There is no
1065
- // dedicated code to indicate no overlap so signal "unknown".
1066
- return OW_Unknown;
1084
+ // Can reach here only if accesses are known not to overlap.
1085
+ return OW_None;
1067
1086
}
1068
1087
1069
1088
bool isInvisibleToCallerAfterRet (const Value *V) {
@@ -1354,6 +1373,15 @@ struct DSEState {
1354
1373
Instruction *KillingI = KillingDef->getMemoryInst ();
1355
1374
LLVM_DEBUG (dbgs () << " trying to get dominating access\n " );
1356
1375
1376
+ // Only optimize defining access of KillingDef when directly starting at its
1377
+ // defining access. The defining access also must only access KillingLoc. At
1378
+ // the moment we only support instructions with a single write location, so
1379
+ // it should be sufficient to disable optimizations for instructions that
1380
+ // also read from memory.
1381
+ bool CanOptimize = OptimizeMemorySSA &&
1382
+ KillingDef->getDefiningAccess () == StartAccess &&
1383
+ !KillingI->mayReadFromMemory ();
1384
+
1357
1385
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1358
1386
Optional<MemoryLocation> CurrentLoc;
1359
1387
for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess ()) {
@@ -1395,8 +1423,10 @@ struct DSEState {
1395
1423
Instruction *CurrentI = CurrentDef->getMemoryInst ();
1396
1424
1397
1425
if (canSkipDef (CurrentDef, !isInvisibleToCallerBeforeRet (KillingUndObj),
1398
- TLI))
1426
+ TLI)) {
1427
+ CanOptimize = false ;
1399
1428
continue ;
1429
+ }
1400
1430
1401
1431
// Before we try to remove anything, check for any extra throwing
1402
1432
// instructions that block us from DSEing
@@ -1437,32 +1467,51 @@ struct DSEState {
1437
1467
1438
1468
// If Current does not have an analyzable write location, skip it
1439
1469
CurrentLoc = getLocForWriteEx (CurrentI);
1440
- if (!CurrentLoc)
1470
+ if (!CurrentLoc) {
1471
+ CanOptimize = false ;
1441
1472
continue ;
1473
+ }
1442
1474
1443
1475
// AliasAnalysis does not account for loops. Limit elimination to
1444
1476
// candidates for which we can guarantee they always store to the same
1445
1477
// memory location and not located in different loops.
1446
1478
if (!isGuaranteedLoopIndependent (CurrentI, KillingI, *CurrentLoc)) {
1447
1479
LLVM_DEBUG (dbgs () << " ... not guaranteed loop independent\n " );
1448
1480
WalkerStepLimit -= 1 ;
1481
+ CanOptimize = false ;
1449
1482
continue ;
1450
1483
}
1451
1484
1452
1485
if (IsMemTerm) {
1453
1486
// If the killing def is a memory terminator (e.g. lifetime.end), check
1454
1487
// the next candidate if the current Current does not write the same
1455
1488
// underlying object as the terminator.
1456
- if (!isMemTerminator (*CurrentLoc, CurrentI, KillingI))
1489
+ if (!isMemTerminator (*CurrentLoc, CurrentI, KillingI)) {
1490
+ CanOptimize = false ;
1457
1491
continue ;
1492
+ }
1458
1493
} else {
1459
1494
int64_t KillingOffset = 0 ;
1460
1495
int64_t DeadOffset = 0 ;
1461
1496
auto OR = isOverwrite (KillingI, CurrentI, KillingLoc, *CurrentLoc,
1462
1497
KillingOffset, DeadOffset);
1498
+ if (CanOptimize) {
1499
+ // CurrentDef is the earliest write clobber of KillingDef. Use it as
1500
+ // optimized access. Do not optimize if CurrentDef is already the
1501
+ // defining access of KillingDef.
1502
+ if (CurrentDef != KillingDef->getDefiningAccess () &&
1503
+ (OR == OW_Complete || OR == OW_MaybePartial))
1504
+ KillingDef->setOptimized (CurrentDef);
1505
+
1506
+ // Once a may-aliasing def is encountered do not set an optimized
1507
+ // access.
1508
+ if (OR != OW_None)
1509
+ CanOptimize = false ;
1510
+ }
1511
+
1463
1512
// If Current does not write to the same object as KillingDef, check
1464
1513
// the next candidate.
1465
- if (OR == OW_Unknown)
1514
+ if (OR == OW_Unknown || OR == OW_None )
1466
1515
continue ;
1467
1516
else if (OR == OW_MaybePartial) {
1468
1517
// If KillingDef only partially overwrites Current, check the next
@@ -1471,6 +1520,7 @@ struct DSEState {
1471
1520
// which are less likely to be removable in the end.
1472
1521
if (PartialLimit <= 1 ) {
1473
1522
WalkerStepLimit -= 1 ;
1523
+ LLVM_DEBUG (dbgs () << " ... reached partial limit ... continue with next access\n " );
1474
1524
continue ;
1475
1525
}
1476
1526
PartialLimit -= 1 ;
@@ -1941,6 +1991,63 @@ struct DSEState {
1941
1991
1942
1992
return false ;
1943
1993
}
1994
+
1995
+ // / Eliminates writes to locations where the value that is being written
1996
+ // / is already stored at the same location.
1997
+ bool eliminateRedundantStoresOfExistingValues () {
1998
+ bool MadeChange = false ;
1999
+ LLVM_DEBUG (dbgs () << " Trying to eliminate MemoryDefs that write the "
2000
+ " already existing value\n " );
2001
+ for (auto *Def : MemDefs) {
2002
+ if (SkipStores.contains (Def) || MSSA.isLiveOnEntryDef (Def) ||
2003
+ !isRemovable (Def->getMemoryInst ()))
2004
+ continue ;
2005
+ MemoryDef *UpperDef;
2006
+ // To conserve compile-time, we avoid walking to the next clobbering def.
2007
+ // Instead, we just try to get the optimized access, if it exists. DSE
2008
+ // will try to optimize defs during the earlier traversal.
2009
+ if (Def->isOptimized ())
2010
+ UpperDef = dyn_cast<MemoryDef>(Def->getOptimized ());
2011
+ else
2012
+ UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess ());
2013
+ if (!UpperDef || MSSA.isLiveOnEntryDef (UpperDef))
2014
+ continue ;
2015
+
2016
+ Instruction *DefInst = Def->getMemoryInst ();
2017
+ Instruction *UpperInst = UpperDef->getMemoryInst ();
2018
+ auto IsRedundantStore = [this , DefInst,
2019
+ UpperInst](MemoryLocation UpperLoc) {
2020
+ if (DefInst->isIdenticalTo (UpperInst))
2021
+ return true ;
2022
+ if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2023
+ if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2024
+ auto MaybeDefLoc = getLocForWriteEx (DefInst);
2025
+ if (!MaybeDefLoc)
2026
+ return false ;
2027
+ int64_t InstWriteOffset = 0 ;
2028
+ int64_t DepWriteOffset = 0 ;
2029
+ auto OR = isOverwrite (UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
2030
+ InstWriteOffset, DepWriteOffset);
2031
+ Value *StoredByte = isBytewiseValue (SI->getValueOperand (), DL);
2032
+ return StoredByte && StoredByte == MemSetI->getOperand (1 ) &&
2033
+ OR == OW_Complete;
2034
+ }
2035
+ }
2036
+ return false ;
2037
+ };
2038
+
2039
+ auto MaybeUpperLoc = getLocForWriteEx (UpperInst);
2040
+ if (!MaybeUpperLoc || !IsRedundantStore (*MaybeUpperLoc) ||
2041
+ isReadClobber (*MaybeUpperLoc, DefInst))
2042
+ continue ;
2043
+ LLVM_DEBUG (dbgs () << " DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2044
+ << ' \n ' );
2045
+ deleteDeadInstruction (DefInst);
2046
+ NumRedundantStores++;
2047
+ MadeChange = true ;
2048
+ }
2049
+ return MadeChange;
2050
+ }
1944
2051
};
1945
2052
1946
2053
static bool eliminateDeadStores (Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -2106,6 +2213,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2106
2213
for (auto &KV : State.IOLs )
2107
2214
MadeChange |= removePartiallyOverlappedStores (State.DL , KV.second , TLI);
2108
2215
2216
+ MadeChange |= State.eliminateRedundantStoresOfExistingValues ();
2109
2217
MadeChange |= State.eliminateDeadWritesAtEndOfFunction ();
2110
2218
return MadeChange;
2111
2219
}
0 commit comments