Skip to content

Commit db2f02f

Browse files
nikicfhahn
authored andcommitted
[MemDep] Use EarliestEscapeInfo (llvm#69727)
Use BatchAA with EarliestEscapeInfo instead of callCapturesBefore() in MemDepAnalysis. The advantage of this is that it will also take not-captured-before information into account for non-calls (see test_store_before_capture for a representative example), and that this is a cached analysis. The disadvantage is that EII is slightly less precise than full CapturedBefore analysis. In practice the impact is positive, with gvn.NumGVNLoad going from 22022 to 22808 on test-suite. The impact to compile-time is also positive, mainly in the ThinLTO configuration. (cherry-picked from 2ad9fde)
1 parent 9592a9d commit db2f02f

File tree

4 files changed

+91
-13
lines changed

4 files changed

+91
-13
lines changed

llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/PointerIntPair.h"
1919
#include "llvm/ADT/PointerSumType.h"
2020
#include "llvm/ADT/SmallPtrSet.h"
21+
#include "llvm/Analysis/AliasAnalysis.h"
2122
#include "llvm/Analysis/MemoryLocation.h"
2223
#include "llvm/IR/PassManager.h"
2324
#include "llvm/IR/PredIteratorCache.h"
@@ -27,7 +28,6 @@
2728

2829
namespace llvm {
2930

30-
class AAResults;
3131
class AssumptionCache;
3232
class BatchAAResults;
3333
class DominatorTree;
@@ -356,6 +356,7 @@ class MemoryDependenceResults {
356356
const TargetLibraryInfo &TLI;
357357
DominatorTree &DT;
358358
PredIteratorCache PredCache;
359+
EarliestEscapeInfo EII;
359360

360361
unsigned DefaultBlockScanLimit;
361362

@@ -367,7 +368,7 @@ class MemoryDependenceResults {
367368
MemoryDependenceResults(AAResults &AA, AssumptionCache &AC,
368369
const TargetLibraryInfo &TLI, DominatorTree &DT,
369370
unsigned DefaultBlockScanLimit)
370-
: AA(AA), AC(AC), TLI(TLI), DT(DT),
371+
: AA(AA), AC(AC), TLI(TLI), DT(DT), EII(DT),
371372
DefaultBlockScanLimit(DefaultBlockScanLimit) {}
372373

373374
/// Handle invalidation in the new PM.

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
268268
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
269269
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
270270
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
271-
BatchAAResults BatchAA(AA);
271+
BatchAAResults BatchAA(AA, &EII);
272272
return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
273273
BatchAA);
274274
}
@@ -610,11 +610,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
610610
continue;
611611

612612
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
613-
ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc);
614-
// If necessary, perform additional analysis.
615-
if (isModAndRefSet(MR))
616-
MR = BatchAA.callCapturesBefore(Inst, MemLoc, &DT);
617-
switch (MR) {
613+
switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
618614
case ModRefInfo::NoModRef:
619615
// If the call has no effect on the queried pointer, just ignore it.
620616
continue;
@@ -1192,7 +1188,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
11921188
bool GotWorklistLimit = false;
11931189
LLVM_DEBUG(AssertSorted(*Cache));
11941190

1195-
BatchAAResults BatchAA(AA);
1191+
BatchAAResults BatchAA(AA, &EII);
11961192
while (!Worklist.empty()) {
11971193
BasicBlock *BB = Worklist.pop_back_val();
11981194

@@ -1504,6 +1500,8 @@ void MemoryDependenceResults::invalidateCachedPredecessors() {
15041500
}
15051501

15061502
void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
1503+
EII.removeInstruction(RemInst);
1504+
15071505
// Walk through the Non-local dependencies, removing this one as the value
15081506
// for any cached queries.
15091507
NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);

llvm/test/CodeGen/BPF/CORE/no-narrow-load.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ lor.end: ; preds = %lor.end.critedge, %
6565
ret void, !dbg !53
6666
}
6767

68-
; CHECK: r[[LOAD1:[0-9]+]] = *(u32 *)(r{{[0-9]+}} + 4)
69-
; CHECK: r[[LOAD1]] &= 65536
70-
; CHECK: r[[LOAD2:[0-9]+]] = *(u32 *)(r{{[0-9]+}} + 4)
71-
; CHECK: r[[LOAD2]] &= 32768
68+
; CHECK: r[[LOAD:[0-9]+]] = *(u32 *)(r{{[0-9]+}} + 4)
69+
; CHECK: r[[COPY:[0-9]+]] = r[[LOAD]]
70+
; CHECK: r[[COPY]] &= 65536
71+
; CHECK: r[[LOAD]] &= 32768
7272

7373
; Function Attrs: nounwind readnone speculatable willreturn
7474
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -S -passes=gvn < %s | FileCheck %s
3+
4+
declare void @capture(ptr)
5+
declare void @some_call()
6+
7+
define i32 @test_call_before_capture(ptr %p) {
8+
; CHECK-LABEL: define i32 @test_call_before_capture(
9+
; CHECK-SAME: ptr [[P:%.*]]) {
10+
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
11+
; CHECK-NEXT: store i32 123, ptr [[A]], align 4
12+
; CHECK-NEXT: call void @some_call()
13+
; CHECK-NEXT: call void @capture(ptr [[A]])
14+
; CHECK-NEXT: ret i32 123
15+
;
16+
%a = alloca i32
17+
store i32 123, ptr %a
18+
call void @some_call()
19+
%v = load i32, ptr %a
20+
call void @capture(ptr %a)
21+
ret i32 %v
22+
}
23+
24+
define i32 @test_call_after_capture(ptr %p) {
25+
; CHECK-LABEL: define i32 @test_call_after_capture(
26+
; CHECK-SAME: ptr [[P:%.*]]) {
27+
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
28+
; CHECK-NEXT: store i32 123, ptr [[A]], align 4
29+
; CHECK-NEXT: call void @capture(ptr [[A]])
30+
; CHECK-NEXT: call void @some_call()
31+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
32+
; CHECK-NEXT: ret i32 [[V]]
33+
;
34+
%a = alloca i32
35+
store i32 123, ptr %a
36+
call void @capture(ptr %a)
37+
call void @some_call()
38+
%v = load i32, ptr %a
39+
ret i32 %v
40+
}
41+
42+
define i32 @test_store_before_capture(ptr %p) {
43+
; CHECK-LABEL: define i32 @test_store_before_capture(
44+
; CHECK-SAME: ptr [[P:%.*]]) {
45+
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
46+
; CHECK-NEXT: store i32 123, ptr [[A]], align 4
47+
; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P]], align 8
48+
; CHECK-NEXT: store i32 42, ptr [[P2]], align 4
49+
; CHECK-NEXT: call void @capture(ptr [[A]])
50+
; CHECK-NEXT: ret i32 123
51+
;
52+
%a = alloca i32
53+
store i32 123, ptr %a
54+
%p2 = load ptr, ptr %p
55+
store i32 42, ptr %p2
56+
%v = load i32, ptr %a
57+
call void @capture(ptr %a)
58+
ret i32 %v
59+
}
60+
61+
define i32 @test_store_after_capture(ptr %p) {
62+
; CHECK-LABEL: define i32 @test_store_after_capture(
63+
; CHECK-SAME: ptr [[P:%.*]]) {
64+
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
65+
; CHECK-NEXT: store i32 123, ptr [[A]], align 4
66+
; CHECK-NEXT: call void @capture(ptr [[A]])
67+
; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P]], align 8
68+
; CHECK-NEXT: store i32 42, ptr [[P2]], align 4
69+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4
70+
; CHECK-NEXT: ret i32 [[V]]
71+
;
72+
%a = alloca i32
73+
store i32 123, ptr %a
74+
call void @capture(ptr %a)
75+
%p2 = load ptr, ptr %p
76+
store i32 42, ptr %p2
77+
%v = load i32, ptr %a
78+
ret i32 %v
79+
}

0 commit comments

Comments
 (0)