Skip to content

Commit 22ce5eb

Browse files
committed
Do not widen load for different variable in GVN.
Summary: Widening load in GVN is too early because it will block other optimizations like PRE, LICM. https://llvm.org/bugs/show_bug.cgi?id=29110 The SPECCPU2006 benchmark impact of this patch: Reference: o2_nopatch (1): o2_patched Benchmark Base:Reference (1) ------------------------------------------------------- spec/2006/fp/C++/444.namd 25.2 -0.08% spec/2006/fp/C++/447.dealII 45.92 +1.05% spec/2006/fp/C++/450.soplex 41.7 -0.26% spec/2006/fp/C++/453.povray 35.65 +1.68% spec/2006/fp/C/433.milc 23.79 +0.42% spec/2006/fp/C/470.lbm 41.88 -1.12% spec/2006/fp/C/482.sphinx3 47.94 +1.67% spec/2006/int/C++/471.omnetpp 22.46 -0.36% spec/2006/int/C++/473.astar 21.19 +0.24% spec/2006/int/C++/483.xalancbmk 36.09 -0.11% spec/2006/int/C/400.perlbench 33.28 +1.35% spec/2006/int/C/401.bzip2 22.76 -0.04% spec/2006/int/C/403.gcc 32.36 +0.12% spec/2006/int/C/429.mcf 41.04 -0.41% spec/2006/int/C/445.gobmk 26.94 +0.04% spec/2006/int/C/456.hmmer 24.5 -0.20% spec/2006/int/C/458.sjeng 28 -0.46% spec/2006/int/C/462.libquantum 55.25 +0.27% spec/2006/int/C/464.h264ref 45.87 +0.72% geometric mean +0.23% For most benchmarks, it's a wash, but we do see stable improvements on some benchmarks, e.g. 447,453,482,400. Reviewers: davidxl, hfinkel, dberlin, sanjoy, reames Subscribers: gberry, junbuml Differential Revision: https://reviews.llvm.org/D24096 llvm-svn: 281074
1 parent a5edf65 commit 22ce5eb

File tree

7 files changed

+23
-54
lines changed

7 files changed

+23
-54
lines changed

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -234,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
234234
return MemDepResult::getNonFuncLocal();
235235
}
236236

237-
/// Return true if LI is a load that would fully overlap MemLoc if done as
238-
/// a wider legal integer load.
239-
///
240-
/// MemLocBase, MemLocOffset are lazily computed here the first time the
241-
/// base/offs of memloc is needed.
242-
static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
243-
const Value *&MemLocBase,
244-
int64_t &MemLocOffs,
245-
const LoadInst *LI) {
246-
const DataLayout &DL = LI->getModule()->getDataLayout();
247-
248-
// If we haven't already computed the base/offset of MemLoc, do so now.
249-
if (!MemLocBase)
250-
MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
251-
252-
unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
253-
MemLocBase, MemLocOffs, MemLoc.Size, LI);
254-
return Size != 0;
255-
}
256-
257237
unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
258238
const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
259239
const LoadInst *LI) {
@@ -410,9 +390,6 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
410390
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
411391
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
412392
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
413-
414-
const Value *MemLocBase = nullptr;
415-
int64_t MemLocOffset = 0;
416393
bool isInvariantLoad = false;
417394

418395
if (!Limit) {
@@ -550,21 +527,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
550527
AliasResult R = AA.alias(LoadLoc, MemLoc);
551528

552529
if (isLoad) {
553-
if (R == NoAlias) {
554-
// If this is an over-aligned integer load (for example,
555-
// "load i8* %P, align 4") see if it would obviously overlap with the
556-
// queried location if widened to a larger load (e.g. if the queried
557-
// location is 1 byte at P+1). If so, return it as a load/load
558-
// clobber result, allowing the client to decide to widen the load if
559-
// it wants to.
560-
if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
561-
if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
562-
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
563-
MemLocOffset, LI))
564-
return MemDepResult::getClobber(Inst);
565-
}
530+
if (R == NoAlias)
566531
continue;
567-
}
568532

569533
// Must aliased loads are defs of each other.
570534
if (R == MustAlias)

llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ define void @end_test_widening_bad() {
3434
ret void
3535
}
3636

37-
;; Accessing bytes 4 and 5. Ok to widen to i16.
37+
;; Accessing bytes 4 and 5. No widen to i16.
3838

3939
define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone sanitize_address {
4040
entry:
@@ -45,7 +45,8 @@ entry:
4545
%add = add nsw i32 %conv, %conv2
4646
ret i32 %add
4747
; CHECK: @test_widening_ok
48-
; CHECK: __asan_report_load2
48+
; CHECK: __asan_report_load1
49+
; CHECK: __asan_report_load1
4950
; CHECK-NOT: __asan_report
5051
; CHECK: end_test_widening_ok
5152
}

llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,16 @@ for.end:
5151
}
5252

5353
; %1 is partially redundant if %0 can be widened to a 64-bit load.
54+
; But we should not widen %0 to 64-bit load.
5455

5556
; CHECK-LABEL: define i32 @overaligned_load
5657
; CHECK: if.then:
57-
; CHECK: %0 = load i64
58-
; CHECK: [[LSHR:%[0-9]+]] = lshr i64 %0, 32, !dbg [[LSHR_LOC:![0-9]+]]
59-
; CHECK: trunc i64 [[LSHR]] to i32
58+
; CHECK-NOT: %0 = load i64
59+
; CHECK-NOT: [[LSHR:%[0-9]+]] = lshr i64 %0, 32, !dbg [[LSHR_LOC:![0-9]+]]
60+
; CHECK-NOT: trunc i64 [[LSHR]] to i32
6061
; CHECK: if.end:
61-
; CHECK-NOT: %1 = load i32, i32*
62-
; CHECK: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
62+
; CHECK: %1 = load i32, i32*
63+
; CHECK-NOT: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
6364

6465
define i32 @overaligned_load(i32 %a, i32* nocapture %b) !dbg !13 {
6566
entry:

llvm/test/Transforms/GVN/PRE/rle.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,7 @@ if.end:
624624

625625
;;===----------------------------------------------------------------------===;;
626626
;; Load Widening
627+
;; We explicitly choose NOT to widen. And are testing to make sure we don't.
627628
;;===----------------------------------------------------------------------===;;
628629

629630
%widening1 = type { i32, i8, i8, i8, i8 }
@@ -640,7 +641,8 @@ entry:
640641
ret i32 %add
641642
; CHECK-LABEL: @test_widening1(
642643
; CHECK-NOT: load
643-
; CHECK: load i16, i16*
644+
; CHECK: load i8, i8*
645+
; CHECK: load i8, i8*
644646
; CHECK-NOT: load
645647
; CHECK: ret i32
646648
}
@@ -664,7 +666,10 @@ entry:
664666
ret i32 %add3
665667
; CHECK-LABEL: @test_widening2(
666668
; CHECK-NOT: load
667-
; CHECK: load i32, i32*
669+
; CHECK: load i8, i8*
670+
; CHECK: load i8, i8*
671+
; CHECK: load i8, i8*
672+
; CHECK: load i8, i8*
668673
; CHECK-NOT: load
669674
; CHECK: ret i32
670675
}

llvm/test/Transforms/GVN/big-endian.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ target triple = "powerpc64-unknown-linux-gnu"
77
;; loads reusing a load value.
88
define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) {
99
; CHECK-LABEL: @test1
10-
; CHECK: [[V1:%.*]] = load i16, i16* %{{.*}}
11-
; CHECK: [[V2:%.*]] = lshr i16 [[V1]], 8
12-
; CHECK: trunc i16 [[V2]] to i1
10+
; CHECK-NOT: [[V1:%.*]] = load i16, i16* %{{.*}}
11+
; CHECK-NOT: [[V2:%.*]] = lshr i16 [[V1]], 8
12+
; CHECK-NOT: trunc i16 [[V2]] to i1
1313

1414
%valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0
1515
%valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8

llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ define i32 @TestNoAsan() {
2525
}
2626

2727
; CHECK-LABEL: @TestNoAsan
28-
; CHECK: %[[LOAD:[^ ]+]] = load i32
29-
; CHECK: {{.*}} = ashr i32 %[[LOAD]]
30-
; CHECK-NOT: {{.*}} = phi
28+
; CHECK: ret i32 0
3129

3230
define i32 @TestAsan() sanitize_address {
3331
%1 = tail call noalias i8* @_Znam(i64 2)

llvm/test/Transforms/GVN/pr25440.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ bb0: ; preds = %land.lhs.true, %entry
1919
%x.tr = phi %struct.a* [ %x, %entry ], [ null, %land.lhs.true ]
2020
%code1 = getelementptr inbounds %struct.a, %struct.a* %x.tr, i32 0, i32 0
2121
%0 = load i16, i16* %code1, align 4
22-
; CHECK: load i32, i32*
22+
; CHECK: load i16, i16*
2323
%conv = zext i16 %0 to i32
2424
switch i32 %conv, label %if.end.50 [
2525
i32 43, label %cleanup
@@ -38,7 +38,7 @@ if.then.26: ; preds = %if.then.5
3838

3939
cond.false: ; preds = %if.then.26
4040
; CHECK: cond.false:
41-
; CHECK-NOT: load
41+
; CHECK: load i16
4242
%mode = getelementptr inbounds %struct.a, %struct.a* %x.tr.lcssa163, i32 0, i32 1
4343
%bf.load = load i16, i16* %mode, align 2
4444
%bf.shl = shl i16 %bf.load, 8

0 commit comments

Comments
 (0)