Skip to content

Commit 8683707

Browse files
committed
[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.4 [skip ci]
1 parent 4bc3b35 commit 8683707

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,15 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
209209
cl::Hidden,
210210
cl::desc("Enable the LoopFlatten Pass"));
211211

212+
// Experimentally allow loop header duplication. This should allow for better
213+
// optimization at Oz, since loop-idiom recognition can then recognize things
214+
// like memcpy. If this ends up being useful for many targets, we should drop
215+
// this flag and make a code generation option that can be controlled
216+
// independent of the opt level and exposed through the frontend.
217+
static cl::opt<bool> EnableLoopHeaderDuplication(
218+
"enable-loop-header-duplication", cl::init(false), cl::Hidden,
219+
cl::desc("Enable loop header duplication at any optimization level"));
220+
212221
static cl::opt<bool>
213222
EnableDFAJumpThreading("enable-dfa-jump-thread",
214223
cl::desc("Enable DFA jump threading"),
@@ -630,8 +639,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
630639
/*AllowSpeculation=*/false));
631640

632641
// Disable header duplication in loop rotation at -Oz.
633-
LPM1.addPass(
634-
LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
642+
LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
643+
Level != OptimizationLevel::Oz,
644+
isLTOPreLink(Phase)));
635645
// TODO: Investigate promotion cap for O1.
636646
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
637647
/*AllowSpeculation=*/true));
@@ -812,7 +822,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
812822
// Disable header duplication in loop rotation at -Oz.
813823
MPM.addPass(createModuleToFunctionPassAdaptor(
814824
createFunctionToLoopPassAdaptor(
815-
LoopRotatePass(Level != OptimizationLevel::Oz),
825+
LoopRotatePass(EnableLoopHeaderDuplication ||
826+
Level != OptimizationLevel::Oz),
816827
/*UseMemorySSA=*/false,
817828
/*UseBlockFrequencyInfo=*/false),
818829
PTO.EagerlyInvalidateAnalyses));
@@ -1422,7 +1433,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
14221433
LoopPassManager LPM;
14231434
// First rotate loops that may have been un-rotated by prior passes.
14241435
// Disable header duplication at -Oz.
1425-
LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1436+
LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
1437+
Level != OptimizationLevel::Oz,
1438+
LTOPreLink));
14261439
// Some loops may have become dead by now. Try to delete them.
14271440
// FIXME: see discussion in https://reviews.llvm.org/D112851,
14281441
// this may need to be revisited once we run GVN before loop deletion

llvm/test/Transforms/LoopRotate/oz-disable.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
55
; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
66

7+
;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
8+
; RUN: opt < %s -S -passes='default<Oz>' -enable-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
9+
710
; Loop should be rotated for -Os but not for -Oz.
811
; OS: rotating Loop at depth 1
912
; OZ-NOT: rotating Loop at depth 1
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
3+
;; Check that -enable-loop-header-duplication at Oz enables certain types of
4+
;; optimizations, for example replacing the loop body w/ a call to memset. If
5+
;; loop idiom recognition begins to recognize unrotated loops, this test will
6+
;; need to be updated.
7+
8+
; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s --check-prefix=NOROTATION
9+
; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication < %s | FileCheck %s --check-prefix=ROTATION
10+
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s --check-prefix=ROTATION
11+
12+
define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
13+
; NOROTATION-LABEL: define void @test(
14+
; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
15+
; NOROTATION-NEXT: entry:
16+
; NOROTATION-NEXT: br label [[LOOP_HEADER:%.*]]
17+
; NOROTATION: loop.header:
18+
; NOROTATION-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
19+
; NOROTATION-NEXT: [[_12_I:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
20+
; NOROTATION-NEXT: br i1 [[_12_I]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
21+
; NOROTATION: loop.latch:
22+
; NOROTATION-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
23+
; NOROTATION-NEXT: store i8 1, ptr [[PTR_IV]], align 1
24+
; NOROTATION-NEXT: br label [[LOOP_HEADER]]
25+
; NOROTATION: exit:
26+
; NOROTATION-NEXT: ret void
27+
;
28+
; ROTATION-LABEL: define void @test(
29+
; ROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
30+
; ROTATION-NEXT: entry:
31+
; ROTATION-NEXT: [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]]
32+
; ROTATION-NEXT: br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]]
33+
; ROTATION: loop.latch.preheader:
34+
; ROTATION-NEXT: [[END3:%.*]] = ptrtoint ptr [[END]] to i64
35+
; ROTATION-NEXT: [[START4:%.*]] = ptrtoint ptr [[START]] to i64
36+
; ROTATION-NEXT: [[TMP0:%.*]] = sub i64 [[END3]], [[START4]]
37+
; ROTATION-NEXT: tail call void @llvm.memset.p0.i64(ptr nonnull align 1 [[START]], i8 1, i64 [[TMP0]], i1 false)
38+
; ROTATION-NEXT: br label [[EXIT]]
39+
; ROTATION: exit:
40+
; ROTATION-NEXT: ret void
41+
;
42+
entry:
43+
br label %loop.header
44+
45+
loop.header:
46+
%ptr.iv = phi i8* [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
47+
%_12.i = icmp eq i8* %ptr.iv, %end
48+
br i1 %_12.i, label %exit, label %loop.latch
49+
50+
loop.latch:
51+
%ptr.iv.next = getelementptr inbounds i8, i8* %ptr.iv, i64 1
52+
store i8 1, i8* %ptr.iv, align 1
53+
br label %loop.header
54+
55+
exit:
56+
ret void
57+
}

0 commit comments

Comments
 (0)