Skip to content

Commit c9ad356

Browse files
committed
[DSE] Use optimized access if available for redundant store elimination.
Using the optimized access enables additional optimizations in cases where the defining access is a non-aliasing store. Alternatively we could also walk upwards and skip non-aliasing defs here, but my experiments so far showed that this will noticeably increase compile-time for little extra gain compared to just using the optimized access. Improvements of dse.NumRedundantStores on MultiSource/CINT2006/CPF2006 on X86 with -O3: test-suite...-typeset/consumer-typeset.test 1.00 76.00 7500.0% test-suite.../Benchmarks/Bullet/bullet.test 3.00 12.00 300.0% test-suite...006/453.povray/453.povray.test 3.00 6.00 100.0% test-suite...telecomm-gsm/telecomm-gsm.test 1.00 2.00 100.0% test-suite...ediabench/gsm/toast/toast.test 1.00 2.00 100.0% test-suite...marks/7zip/7zip-benchmark.test 1.00 2.00 100.0% test-suite...ications/JM/lencod/lencod.test 7.00 10.00 42.9% test-suite...6/464.h264ref/464.h264ref.test 6.00 8.00 33.3% test-suite...ications/JM/ldecod/ldecod.test 6.00 7.00 16.7% test-suite...006/447.dealII/447.dealII.test 33.00 33.00 0.0% test-suite...6/471.omnetpp/471.omnetpp.test NaN 1.00 nan% test-suite...006/450.soplex/450.soplex.test NaN 2.00 nan% test-suite.../CINT2006/403.gcc/403.gcc.test NaN 7.00 nan% test-suite...lications/ClamAV/clamscan.test NaN 1.00 nan% test-suite...CI_Purple/SMG2000/smg2000.test NaN 3.00 nan% Follow-up to D111727. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D112315
1 parent 316e627 commit c9ad356

File tree

2 files changed

+48
-22
lines changed

2 files changed

+48
-22
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1928,7 +1928,14 @@ struct DSEState {
19281928
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
19291929
!isRemovable(Def->getMemoryInst()))
19301930
continue;
1931-
auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
1931+
MemoryDef *UpperDef;
1932+
// To conserve compile-time, we avoid walking to the next clobbering def.
1933+
// Instead, we just try to get the optimized access, if it exists. DSE
1934+
// will try to optimize defs during the earlier traversal.
1935+
if (Def->isOptimized())
1936+
UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
1937+
else
1938+
UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
19321939
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
19331940
continue;
19341941

llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -basic-aa -dse -S %s | FileCheck %s
2+
; RUN: opt -basic-aa -dse -dse-optimize-memoryssa=false -S %s | FileCheck --check-prefixes=CHECK,UNOPT %s
3+
; RUN: opt -basic-aa -dse -dse-optimize-memoryssa -S %s | FileCheck --check-prefixes=CHECK,OPT %s
4+
; RUN: opt -basic-aa -dse -S %s | FileCheck --check-prefixes=CHECK,UNOPT %s
35

46
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
57

@@ -315,17 +317,28 @@ bb3:
315317

316318
; The store in bb3 can be eliminated, because the store in bb1 cannot alias it.
317319
define void @test10(i32* noalias %P, i32* %Q, i1 %c) {
318-
; CHECK-LABEL: @test10(
319-
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
320-
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
321-
; CHECK: bb1:
322-
; CHECK-NEXT: store i32 10, i32* [[Q:%.*]], align 4
323-
; CHECK-NEXT: br label [[BB3:%.*]]
324-
; CHECK: bb2:
325-
; CHECK-NEXT: ret void
326-
; CHECK: bb3:
327-
; CHECK-NEXT: store i32 0, i32* [[P]], align 4
328-
; CHECK-NEXT: ret void
320+
; UNOPT-LABEL: @test10(
321+
; UNOPT-NEXT: store i32 0, i32* [[P:%.*]], align 4
322+
; UNOPT-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
323+
; UNOPT: bb1:
324+
; UNOPT-NEXT: store i32 10, i32* [[Q:%.*]], align 4
325+
; UNOPT-NEXT: br label [[BB3:%.*]]
326+
; UNOPT: bb2:
327+
; UNOPT-NEXT: ret void
328+
; UNOPT: bb3:
329+
; UNOPT-NEXT: store i32 0, i32* [[P]], align 4
330+
; UNOPT-NEXT: ret void
331+
;
332+
; OPT-LABEL: @test10(
333+
; OPT-NEXT: store i32 0, i32* [[P:%.*]], align 4
334+
; OPT-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
335+
; OPT: bb1:
336+
; OPT-NEXT: store i32 10, i32* [[Q:%.*]], align 4
337+
; OPT-NEXT: br label [[BB3:%.*]]
338+
; OPT: bb2:
339+
; OPT-NEXT: ret void
340+
; OPT: bb3:
341+
; OPT-NEXT: ret void
329342
;
330343
store i32 0, i32* %P
331344
br i1 %c, label %bb1, label %bb2
@@ -412,13 +425,19 @@ define void @test12_memset_simple(i8* %ptr) {
412425
}
413426

414427
define void @test12_memset_other_store_in_between(i8* %ptr) {
415-
; CHECK-LABEL: @test12_memset_other_store_in_between(
416-
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
417-
; CHECK-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
418-
; CHECK-NEXT: store i8 8, i8* [[PTR_4]], align 1
419-
; CHECK-NEXT: [[PTR_5:%.*]] = getelementptr i8, i8* [[PTR]], i64 5
420-
; CHECK-NEXT: store i8 0, i8* [[PTR_5]], align 1
421-
; CHECK-NEXT: ret void
428+
; UNOPT-LABEL: @test12_memset_other_store_in_between(
429+
; UNOPT-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
430+
; UNOPT-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
431+
; UNOPT-NEXT: store i8 8, i8* [[PTR_4]], align 1
432+
; UNOPT-NEXT: [[PTR_5:%.*]] = getelementptr i8, i8* [[PTR]], i64 5
433+
; UNOPT-NEXT: store i8 0, i8* [[PTR_5]], align 1
434+
; UNOPT-NEXT: ret void
435+
;
436+
; OPT-LABEL: @test12_memset_other_store_in_between(
437+
; OPT-NEXT: call void @llvm.memset.p0i8.i64(i8* [[PTR:%.*]], i8 0, i64 10, i1 false)
438+
; OPT-NEXT: [[PTR_4:%.*]] = getelementptr i8, i8* [[PTR]], i64 4
439+
; OPT-NEXT: store i8 8, i8* [[PTR_4]], align 1
440+
; OPT-NEXT: ret void
422441
;
423442
call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 10, i1 false)
424443
%ptr.4 = getelementptr i8, i8* %ptr, i64 4
@@ -525,8 +544,8 @@ declare i8* @strcat(i8*, i8*) nounwind argmemonly
525544

526545
define void @test14_strcat(i8* noalias %P, i8* noalias %Q) {
527546
; CHECK-LABEL: @test14_strcat(
528-
; CHECK-NEXT: call i8* @strcat(i8* [[P:%.*]], i8* [[Q:%.*]])
529-
; CHECK-NEXT: call i8* @strcat(i8* [[P]], i8* [[Q]])
547+
; CHECK-NEXT: [[CALL1:%.*]] = call i8* @strcat(i8* [[P:%.*]], i8* [[Q:%.*]])
548+
; CHECK-NEXT: [[CALL2:%.*]] = call i8* @strcat(i8* [[P]], i8* [[Q]])
530549
; CHECK-NEXT: ret void
531550
;
532551
%call1 = call i8* @strcat(i8* %P, i8* %Q)

0 commit comments

Comments
 (0)