Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 2f9c8a0

Browse files
author
Fedor Sergeev
committed
[LoopUnroll] allow customization for new-pass-manager version of LoopUnroll
Unlike its legacy counterpart new pass manager's LoopUnrollPass does not provide any means to select which flavors of unroll to run (runtime, peeling, partial), relying on global defaults. In some cases having ability to run a restricted LoopUnroll that does more than LoopFullUnroll is needed. Introduced LoopUnrollOptions to select optional unroll behaviors. Added 'unroll<peeling>' to PassRegistry mainly for the sake of testing. Reviewers: chandlerc, tejohnson Differential Revision: https://reviews.llvm.org/D53440 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@345723 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ea1119a commit 2f9c8a0

File tree

6 files changed

+104
-16
lines changed

6 files changed

+104
-16
lines changed

include/llvm/Transforms/Scalar/LoopUnrollPass.h

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
1111
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
1212

13+
#include "llvm/ADT/Optional.h"
1314
#include "llvm/Analysis/LoopAnalysisManager.h"
1415
#include "llvm/IR/PassManager.h"
1516

@@ -30,16 +31,71 @@ class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
3031
LoopStandardAnalysisResults &AR, LPMUpdater &U);
3132
};
3233

34+
/// A set of parameters used to control various transforms performed by the
35+
/// LoopUnroll pass. Each of the boolean parameters can be set to:
36+
/// true - enabling the transformation.
37+
/// false - disabling the transformation.
38+
/// None - relying on a global default.
39+
///
40+
/// There is also OptLevel parameter, which is used for additional loop unroll
41+
/// tuning.
42+
///
43+
/// Intended use is to create a default object, modify parameters with
44+
/// additional setters and then pass it to LoopUnrollPass.
45+
///
46+
struct LoopUnrollOptions {
47+
Optional<bool> AllowPartial;
48+
Optional<bool> AllowPeeling;
49+
Optional<bool> AllowRuntime;
50+
Optional<bool> AllowUpperBound;
51+
int OptLevel;
52+
53+
LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {}
54+
55+
/// Enables or disables partial unrolling. When disabled only full unrolling
56+
/// is allowed.
57+
LoopUnrollOptions &setPartial(bool Partial) {
58+
AllowPartial = Partial;
59+
return *this;
60+
}
61+
62+
/// Enables or disables unrolling of loops with runtime trip count.
63+
LoopUnrollOptions &setRuntime(bool Runtime) {
64+
AllowRuntime = Runtime;
65+
return *this;
66+
}
67+
68+
/// Enables or disables loop peeling.
69+
LoopUnrollOptions &setPeeling(bool Peeling) {
70+
AllowPeeling = Peeling;
71+
return *this;
72+
}
73+
74+
/// Enables or disables the use of trip count upper bound
75+
/// in loop unrolling.
76+
LoopUnrollOptions &setUpperBound(bool UpperBound) {
77+
AllowUpperBound = UpperBound;
78+
return *this;
79+
}
80+
81+
// Sets "optimization level" tuning parameter for loop unrolling.
82+
LoopUnrollOptions &setOptLevel(int O) {
83+
OptLevel = O;
84+
return *this;
85+
}
86+
};
87+
3388
/// Loop unroll pass that will support both full and partial unrolling.
3489
/// It is a function pass to have access to function and module analyses.
3590
/// It will also put loops into canonical form (simplified and LCSSA).
3691
class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
37-
const int OptLevel;
92+
LoopUnrollOptions UnrollOpts;
3893

3994
public:
4095
/// This uses the target information (or flags) to control the thresholds for
4196
/// different unrolling stategies but supports all of them.
42-
explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
97+
explicit LoopUnrollPass(LoopUnrollOptions UnrollOpts = {})
98+
: UnrollOpts(UnrollOpts) {}
4399

44100
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
45101
};

lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
830830
OptimizePM.addPass(
831831
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
832832
}
833-
OptimizePM.addPass(LoopUnrollPass(Level));
833+
OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
834834
OptimizePM.addPass(InstCombinePass());
835835
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
836836
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));

lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ FUNCTION_PASS("sroa", SROA())
215215
FUNCTION_PASS("tailcallelim", TailCallElimPass())
216216
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
217217
FUNCTION_PASS("unroll", LoopUnrollPass())
218+
FUNCTION_PASS("unroll<peeling;no-runtime>",LoopUnrollPass(LoopUnrollOptions().setPeeling(true).setRuntime(false)))
218219
FUNCTION_PASS("verify", VerifierPass())
219220
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
220221
FUNCTION_PASS("verify<loops>", LoopVerifierPass())

lib/Transforms/Scalar/LoopUnrollPass.cpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,23 +1333,20 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
13331333
Loop *ParentL = L.getParentLoop();
13341334
#endif
13351335

1336-
// The API here is quite complex to call, but there are only two interesting
1337-
// states we support: partial and full (or "simple") unrolling. However, to
1338-
// enable these things we actually pass "None" in for the optional to avoid
1339-
// providing an explicit choice.
1340-
Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
1341-
AllowPeeling;
13421336
// Check if the profile summary indicates that the profiled application
13431337
// has a huge working set size, in which case we disable peeling to avoid
13441338
// bloating it further.
1339+
Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
13451340
if (PSI && PSI->hasHugeWorkingSetSize())
1346-
AllowPeeling = false;
1341+
LocalAllowPeeling = false;
13471342
std::string LoopName = L.getName();
1348-
LoopUnrollResult Result =
1349-
tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
1350-
/*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1351-
/*Threshold*/ None, AllowPartialParam, RuntimeParam,
1352-
UpperBoundParam, AllowPeeling);
1343+
// The API here is quite complex to call and we allow to select some
1344+
// flavors of unrolling during construction time (by setting UnrollOpts).
1345+
LoopUnrollResult Result = tryToUnrollLoop(
1346+
&L, DT, &LI, SE, TTI, AC, ORE,
1347+
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None,
1348+
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1349+
UnrollOpts.AllowUpperBound, LocalAllowPeeling);
13531350
Changed |= Result != LoopUnrollResult::Unmodified;
13541351

13551352
// The parent must not be damaged by unrolling!

test/Transforms/LoopUnroll/peel-loop.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -verify-dom-info -simplifycfg -instcombine | FileCheck %s
2+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
3+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
24

35
; Basic loop peeling - check that we can peel-off the first 3 loop iterations
46
; when explicitly requested.

test/Transforms/LoopUnroll/runtime-loop.ll

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
22
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
3-
3+
;
44
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
55
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
6+
;
7+
; Restricted versions of unroll (unroll<peeling;noruntime>, unroll-full) should not be doing runtime unrolling
8+
; even if it is globally enabled through -unroll-runtime option
9+
;
10+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
11+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
12+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
13+
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
614

715
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
816

@@ -14,22 +22,32 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
1422
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
1523
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
1624

25+
; NOEPILOG-NOT: %xtraiter = and i32 %n
26+
1727
; PROLOG: %xtraiter = and i32 %n
1828
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
1929
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
2030

31+
; NOPROLOG-NOT: %xtraiter = and i32 %n
32+
2133
; EPILOG: for.body.epil:
2234
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
2335
; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1
2436
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
2537
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
2638

39+
; NOEPILOG: for.body:
40+
; NOEPILOG-NOT: for.body.epil:
41+
2742
; PROLOG: for.body.prol:
2843
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
2944
; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1
3045
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
3146
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
3247

48+
; NOPROLOG: for.body:
49+
; NOPROLOG-NOT: for.body.prol:
50+
3351

3452
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
3553
entry:
@@ -86,6 +104,8 @@ for.end: ; preds = %for.body
86104
; COMMON-LABEL: @foo(
87105
; EPILOG: bb72.2:
88106
; PROLOG: bb72.2:
107+
; NOEPILOG-NOT: bb72.2:
108+
; NOPROLOG-NOT: bb72.2:
89109

90110
define void @foo(i32 %trips) {
91111
entry:
@@ -111,9 +131,15 @@ cond_true138:
111131
; EPILOG: for.body.epil:
112132
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
113133

134+
; NOEPILOG: for.body:
135+
; NOEPILOG-NOT: for.body.epil:
136+
114137
; PROLOG: for.body.prol:
115138
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
116139

140+
; NOPROLOG: for.body:
141+
; NOPROLOG-NOT: for.body.prol:
142+
117143
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
118144
entry:
119145
%cmp2 = icmp eq i32 %len, 0
@@ -146,9 +172,15 @@ for.end: ; preds = %for.cond.for.end_cr
146172
; EPILOG: for.body:
147173
; EPILOG-NOT: for.body.epil:
148174

175+
; NOEPILOG: for.body:
176+
; NOEPILOG-NOT: for.body.epil:
177+
149178
; PROLOG: for.body:
150179
; PROLOG-NOT: for.body.prol:
151180

181+
; NOPROLOG: for.body:
182+
; NOPROLOG-NOT: for.body.prol:
183+
152184
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
153185
entry:
154186
%cmp2 = icmp eq i32 %len, 0

0 commit comments

Comments
 (0)