Skip to content

Commit 1f11450

Browse files
committed
[DSE] Use correct memory location for read clobber check
MSSA DSE starts at a killing store, finds an earlier store and then checks that the earlier store is not read along any paths (without being killed first). However, it uses the memory location of the killing store for that, not the earlier store that we're attempting to eliminate. This has a number of problems: * Mismatches between what BasicAA considers aliasing and what DSE considers an overwrite (even though both are correct in isolation) can result in miscompiles. This is PR48279, which D92045 tries to fix in a different way. The problem is that we're using a location from a store that is potentially not executed and thus may be UB, in which case analysis results can be arbitrary. * Metadata on the killing store may be used to determine aliasing, but there is no guarantee that the metadata is valid, as the specific killing store may not be executed. Using the metadata on the earlier store is valid (it is the store we're removing, so on any execution where its removal may be observed, it must be executed). * The location is imprecise. For full overwrites the killing store will always have a location that is larger or equal than the earlier access location, so it's beneficial to use the earlier access location. This is not the case for partial overwrites, in which case either location might be smaller. There is some room for improvement here. Using the earlier access location means that we can no longer cache which accesses are read for a given killing store, as we may be querying different locations. However, it turns out that simply dropping the cache has no notable impact on compile-time. Differential Revision: https://reviews.llvm.org/D93523
1 parent 1c3a667 commit 1f11450

File tree

5 files changed

+28
-67
lines changed

5 files changed

+28
-67
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 21 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,18 +1634,6 @@ struct DSEState {
16341634
/// basic block.
16351635
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
16361636

1637-
struct CheckCache {
1638-
SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
1639-
SmallPtrSet<MemoryAccess *, 16> KnownReads;
1640-
1641-
bool isKnownNoRead(MemoryAccess *A) const {
1642-
return KnownNoReads.find(A) != KnownNoReads.end();
1643-
}
1644-
bool isKnownRead(MemoryAccess *A) const {
1645-
return KnownReads.find(A) != KnownReads.end();
1646-
}
1647-
};
1648-
16491637
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
16501638
PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
16511639
: F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
@@ -1940,9 +1928,8 @@ struct DSEState {
19401928
Optional<MemoryAccess *>
19411929
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
19421930
const MemoryLocation &DefLoc, const Value *DefUO,
1943-
CheckCache &Cache, unsigned &ScanLimit,
1944-
unsigned &WalkerStepLimit, bool IsMemTerm,
1945-
unsigned &PartialLimit) {
1931+
unsigned &ScanLimit, unsigned &WalkerStepLimit,
1932+
bool IsMemTerm, unsigned &PartialLimit) {
19461933
if (ScanLimit == 0 || WalkerStepLimit == 0) {
19471934
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
19481935
return None;
@@ -1954,6 +1941,7 @@ struct DSEState {
19541941
LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
19551942

19561943
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1944+
Optional<MemoryLocation> CurrentLoc;
19571945
do {
19581946
StepAgain = false;
19591947
LLVM_DEBUG({
@@ -2017,12 +2005,8 @@ struct DSEState {
20172005
// clobber, bail out, as the path is not profitable. We skip this check
20182006
// for intrinsic calls, because the code knows how to handle memcpy
20192007
// intrinsics.
2020-
if (!isa<IntrinsicInst>(CurrentI) &&
2021-
(Cache.KnownReads.contains(Current) ||
2022-
isReadClobber(DefLoc, CurrentI))) {
2023-
Cache.KnownReads.insert(Current);
2008+
if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
20242009
return None;
2025-
}
20262010

20272011
// Quick check if there are direct uses that are read-clobbers.
20282012
if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
@@ -2031,7 +2015,6 @@ struct DSEState {
20312015
isReadClobber(DefLoc, UseOrDef->getMemoryInst());
20322016
return false;
20332017
})) {
2034-
Cache.KnownReads.insert(Current);
20352018
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
20362019
return None;
20372020
}
@@ -2045,13 +2028,24 @@ struct DSEState {
20452028
}
20462029

20472030
// If Current does not have an analyzable write location, skip it
2048-
auto CurrentLoc = getLocForWriteEx(CurrentI);
2031+
CurrentLoc = getLocForWriteEx(CurrentI);
20492032
if (!CurrentLoc) {
20502033
StepAgain = true;
20512034
Current = CurrentDef->getDefiningAccess();
20522035
continue;
20532036
}
20542037

2038+
// AliasAnalysis does not account for loops. Limit elimination to
2039+
// candidates for which we can guarantee they always store to the same
2040+
// memory location and not multiple locations in a loop.
2041+
if (Current->getBlock() != KillingDef->getBlock() &&
2042+
!IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
2043+
StepAgain = true;
2044+
Current = CurrentDef->getDefiningAccess();
2045+
WalkerStepLimit -= 1;
2046+
continue;
2047+
}
2048+
20552049
if (IsMemTerm) {
20562050
// If the killing def is a memory terminator (e.g. lifetime.end), check
20572051
// the next candidate if the current Current does not write the same
@@ -2062,17 +2056,6 @@ struct DSEState {
20622056
}
20632057
continue;
20642058
} else {
2065-
// AliasAnalysis does not account for loops. Limit elimination to
2066-
// candidates for which we can guarantee they always store to the same
2067-
// memory location and not multiple locations in a loop.
2068-
if (Current->getBlock() != KillingDef->getBlock() &&
2069-
!IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
2070-
StepAgain = true;
2071-
Current = CurrentDef->getDefiningAccess();
2072-
WalkerStepLimit -= 1;
2073-
continue;
2074-
}
2075-
20762059
int64_t InstWriteOffset, DepWriteOffset;
20772060
auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
20782061
DepWriteOffset, InstWriteOffset, BatchAA, &F);
@@ -2133,18 +2116,6 @@ struct DSEState {
21332116
}
21342117
--ScanLimit;
21352118
NumDomMemDefChecks++;
2136-
2137-
// Check if we already visited this access.
2138-
if (Cache.isKnownNoRead(UseAccess)) {
2139-
LLVM_DEBUG(dbgs() << " ... skip, discovered that " << *UseAccess
2140-
<< " is safe earlier.\n");
2141-
continue;
2142-
}
2143-
if (Cache.isKnownRead(UseAccess)) {
2144-
LLVM_DEBUG(dbgs() << " ... bail out, discovered that " << *UseAccess
2145-
<< " has a read-clobber earlier.\n");
2146-
return None;
2147-
}
21482119
KnownNoReads.insert(UseAccess);
21492120

21502121
if (isa<MemoryPhi>(UseAccess)) {
@@ -2172,7 +2143,7 @@ struct DSEState {
21722143

21732144
// A memory terminator kills all preceeding MemoryDefs and all succeeding
21742145
// MemoryAccesses. We do not have to check it's users.
2175-
if (isMemTerminator(DefLoc, KillingI, UseInst)) {
2146+
if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
21762147
LLVM_DEBUG(
21772148
dbgs()
21782149
<< " ... skipping, memterminator invalidates following accesses\n");
@@ -2187,19 +2158,13 @@ struct DSEState {
21872158

21882159
if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
21892160
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
2190-
Cache.KnownReads.insert(UseAccess);
2191-
Cache.KnownReads.insert(StartAccess);
2192-
Cache.KnownReads.insert(EarlierAccess);
21932161
return None;
21942162
}
21952163

21962164
// Uses which may read the original MemoryDef mean we cannot eliminate the
21972165
// original MD. Stop walk.
2198-
if (isReadClobber(DefLoc, UseInst)) {
2166+
if (isReadClobber(*CurrentLoc, UseInst)) {
21992167
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
2200-
Cache.KnownReads.insert(UseAccess);
2201-
Cache.KnownReads.insert(StartAccess);
2202-
Cache.KnownReads.insert(EarlierAccess);
22032168
return None;
22042169
}
22052170

@@ -2223,7 +2188,7 @@ struct DSEState {
22232188
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
22242189
// stores [0,1]
22252190
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
2226-
if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) {
2191+
if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
22272192
if (!isInvisibleToCallerAfterRet(DefUO) &&
22282193
UseAccess != EarlierAccess) {
22292194
BasicBlock *MaybeKillingBlock = UseInst->getParent();
@@ -2311,7 +2276,6 @@ struct DSEState {
23112276

23122277
// No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
23132278
// potentially dead.
2314-
Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end());
23152279
return {EarlierAccess};
23162280
}
23172281

@@ -2550,16 +2514,15 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
25502514

25512515
bool Shortend = false;
25522516
bool IsMemTerm = State.isMemTerminatorInst(SI);
2553-
DSEState::CheckCache Cache;
25542517
// Check if MemoryAccesses in the worklist are killed by KillingDef.
25552518
for (unsigned I = 0; I < ToCheck.size(); I++) {
25562519
Current = ToCheck[I];
25572520
if (State.SkipStores.count(Current))
25582521
continue;
25592522

25602523
Optional<MemoryAccess *> Next = State.getDomMemoryDef(
2561-
KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit,
2562-
WalkerStepLimit, IsMemTerm, PartialLimit);
2524+
KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
2525+
IsMemTerm, PartialLimit);
25632526

25642527
if (!Next) {
25652528
LLVM_DEBUG(dbgs() << " finished walk\n");

llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ bb3:
4141
}
4242

4343
; Post-dominating store.
44+
; TODO: The memset can be shortened.
4445
define void @accessible_after_return_2(i32* noalias %P, i1 %c) {
4546
; CHECK-LABEL: @accessible_after_return_2(
4647
; CHECK-NEXT: entry:
4748
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
4849
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
49-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
50-
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
50+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
5151
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
5252
; CHECK: bb1:
5353
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1

llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ define i32 @test_out_of_bounds_store_nonlocal(i1 %c) {
3636
; CHECK-NEXT: [[D:%.*]] = alloca [1 x i32], align 4
3737
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
3838
; CHECK: for.body:
39+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[D]], i64 0, i64 0
40+
; CHECK-NEXT: store i32 10, i32* [[ARRAYIDX]], align 4
3941
; CHECK-NEXT: br label [[FOR_INC:%.*]]
4042
; CHECK: for.inc:
4143
; CHECK-NEXT: br i1 [[C:%.*]], label [[FOR_BODY_1:%.*]], label [[FOR_END:%.*]]

llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,11 @@ end:
6767
ret void
6868
}
6969

70-
; TODO: The store to %a0 is dead, because only %a1 is read later.
70+
; The store to %a0 is dead, because only %a1 is read later.
7171
define void @test3(i1 %c) {
7272
; CHECK-LABEL: @test3(
7373
; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1
74-
; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
7574
; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
76-
; CHECK-NEXT: store i8 1, i8* [[A0]], align 1
7775
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
7876
; CHECK: if:
7977
; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1
@@ -102,10 +100,8 @@ else:
102100
define void @test4(i1 %c) {
103101
; CHECK-LABEL: @test4(
104102
; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1
105-
; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
106103
; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
107104
; CHECK-NEXT: store i8 1, i8* [[A1]], align 1
108-
; CHECK-NEXT: store i8 1, i8* [[A0]], align 1
109105
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
110106
; CHECK: if:
111107
; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1

llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
; Assume that %p1 != %p2 if and only if %c is true. In that case the noalias
55
; metadata is correct, but the first store cannot be eliminated, as it may be
66
; read-clobbered by the load.
7-
; TODO The store is incorrectly eliminated.
87
define void @test(i1 %c, i8* %p1, i8* %p2) {
98
; CHECK-LABEL: @test(
9+
; CHECK-NEXT: store i8 0, i8* [[P1:%.*]], align 1
1010
; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[P2:%.*]], align 1, !alias.scope !0
1111
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
1212
; CHECK: if:
13-
; CHECK-NEXT: store i8 1, i8* [[P1:%.*]], align 1, !noalias !0
13+
; CHECK-NEXT: store i8 1, i8* [[P1]], align 1, !noalias !0
1414
; CHECK-NEXT: ret void
1515
; CHECK: else:
1616
; CHECK-NEXT: store i8 2, i8* [[P1]], align 1

0 commit comments

Comments
 (0)