Skip to content

Commit ad76375

Browse files
author
Sjoerd Meijer
committed
[HardwareLoops] Optimisation remarks
This adds the initial plumbing to support optimisation remarks in the IR hardware-loop pass. I have left a todo in a comment where we can improve the reporting, and will iterate on that now that we have this initial support in. Differential Revision: https://reviews.llvm.org/D68579 llvm-svn: 374980
1 parent 8af5ada commit ad76375

File tree

4 files changed

+97
-25
lines changed

4 files changed

+97
-25
lines changed

llvm/lib/CodeGen/HardwareLoops.cpp

Lines changed: 81 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/ADT/Statistic.h"
2222
#include "llvm/Analysis/AssumptionCache.h"
2323
#include "llvm/Analysis/LoopInfo.h"
24+
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2425
#include "llvm/Analysis/ScalarEvolution.h"
2526
#include "llvm/Analysis/ScalarEvolutionExpander.h"
2627
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -75,8 +76,44 @@ ForceGuardLoopEntry(
7576

7677
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
7778

79+
#ifndef NDEBUG
80+
static void debugHWLoopFailure(const StringRef DebugMsg,
81+
Instruction *I) {
82+
dbgs() << "HWLoops: " << DebugMsg;
83+
if (I)
84+
dbgs() << ' ' << *I;
85+
else
86+
dbgs() << '.';
87+
dbgs() << '\n';
88+
}
89+
#endif
90+
91+
static OptimizationRemarkAnalysis
92+
createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
93+
Value *CodeRegion = L->getHeader();
94+
DebugLoc DL = L->getStartLoc();
95+
96+
if (I) {
97+
CodeRegion = I->getParent();
98+
// If there is no debug location attached to the instruction, revert back to
99+
// using the loop's.
100+
if (I->getDebugLoc())
101+
DL = I->getDebugLoc();
102+
}
103+
104+
OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
105+
R << "hardware-loop not created: ";
106+
return R;
107+
}
108+
78109
namespace {
79110

111+
void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
112+
OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
113+
LLVM_DEBUG(debugHWLoopFailure(Msg, I));
114+
ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
115+
}
116+
80117
using TTI = TargetTransformInfo;
81118

82119
class HardwareLoops : public FunctionPass {
@@ -97,6 +134,7 @@ namespace {
97134
AU.addRequired<ScalarEvolutionWrapperPass>();
98135
AU.addRequired<AssumptionCacheTracker>();
99136
AU.addRequired<TargetTransformInfoWrapperPass>();
137+
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
100138
}
101139

102140
// Try to convert the given Loop into a hardware loop.
@@ -110,6 +148,7 @@ namespace {
110148
ScalarEvolution *SE = nullptr;
111149
LoopInfo *LI = nullptr;
112150
const DataLayout *DL = nullptr;
151+
OptimizationRemarkEmitter *ORE = nullptr;
113152
const TargetTransformInfo *TTI = nullptr;
114153
DominatorTree *DT = nullptr;
115154
bool PreserveLCSSA = false;
@@ -143,8 +182,9 @@ namespace {
143182

144183
public:
145184
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
146-
const DataLayout &DL) :
147-
SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
185+
const DataLayout &DL,
186+
OptimizationRemarkEmitter *ORE) :
187+
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
148188
ExitCount(Info.ExitCount),
149189
CountType(Info.CountType),
150190
ExitBranch(Info.ExitBranch),
@@ -157,6 +197,7 @@ namespace {
157197
private:
158198
ScalarEvolution &SE;
159199
const DataLayout &DL;
200+
OptimizationRemarkEmitter *ORE = nullptr;
160201
Loop *L = nullptr;
161202
Module *M = nullptr;
162203
const SCEV *ExitCount = nullptr;
@@ -182,6 +223,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
182223
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
183224
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
184225
DL = &F.getParent()->getDataLayout();
226+
ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
185227
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
186228
LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
187229
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
@@ -201,31 +243,39 @@ bool HardwareLoops::runOnFunction(Function &F) {
201243
// converted and the parent loop doesn't support containing a hardware loop.
202244
bool HardwareLoops::TryConvertLoop(Loop *L) {
203245
// Process nested loops first.
204-
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
205-
if (TryConvertLoop(*I))
246+
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
247+
if (TryConvertLoop(*I)) {
248+
reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
249+
ORE, L);
206250
return true; // Stop search.
251+
}
252+
}
207253

208254
HardwareLoopInfo HWLoopInfo(L);
209-
if (!HWLoopInfo.canAnalyze(*LI))
255+
if (!HWLoopInfo.canAnalyze(*LI)) {
256+
reportHWLoopFailure("cannot analyze loop, irreducible control flow",
257+
"HWLoopCannotAnalyze", ORE, L);
210258
return false;
259+
}
211260

212-
if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
213-
ForceHardwareLoops) {
214-
215-
// Allow overriding of the counter width and loop decrement value.
216-
if (CounterBitWidth.getNumOccurrences())
217-
HWLoopInfo.CountType =
218-
IntegerType::get(M->getContext(), CounterBitWidth);
261+
if (!ForceHardwareLoops &&
262+
!TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
263+
reportHWLoopFailure("it's not profitable to create a hardware-loop",
264+
"HWLoopNotProfitable", ORE, L);
265+
return false;
266+
}
219267

220-
if (LoopDecrement.getNumOccurrences())
221-
HWLoopInfo.LoopDecrement =
222-
ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
268+
// Allow overriding of the counter width and loop decrement value.
269+
if (CounterBitWidth.getNumOccurrences())
270+
HWLoopInfo.CountType =
271+
IntegerType::get(M->getContext(), CounterBitWidth);
223272

224-
MadeChange |= TryConvertLoop(HWLoopInfo);
225-
return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
226-
}
273+
if (LoopDecrement.getNumOccurrences())
274+
HWLoopInfo.LoopDecrement =
275+
ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
227276

228-
return false;
277+
MadeChange |= TryConvertLoop(HWLoopInfo);
278+
return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
229279
}
230280

231281
bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
@@ -234,8 +284,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
234284
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
235285

236286
if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
237-
ForceHardwareLoopPHI))
287+
ForceHardwareLoopPHI)) {
288+
// TODO: there can be many reasons a loop is not considered a
289+
// candidate, so we should let isHardwareLoopCandidate fill in the
290+
// reason and then report a better message here.
291+
reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
238292
return false;
293+
}
239294

240295
assert(
241296
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
@@ -249,18 +304,21 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
249304
if (!Preheader)
250305
return false;
251306

252-
HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
307+
HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
253308
HWLoop.Create();
254309
++NumHWLoops;
255310
return true;
256311
}
257312

258313
void HardwareLoop::Create() {
259314
LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
260-
315+
261316
Value *LoopCountInit = InitLoopCount();
262-
if (!LoopCountInit)
317+
if (!LoopCountInit) {
318+
reportHWLoopFailure("could not safely create a loop count expression",
319+
"HWLoopNotSafe", ORE, L);
263320
return;
321+
}
264322

265323
InsertIterationSetup(LoopCountInit);
266324

llvm/test/CodeGen/ARM/O3-pipeline.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@
5252
; CHECK-NEXT: Dominator Tree Construction
5353
; CHECK-NEXT: Natural Loop Information
5454
; CHECK-NEXT: Scalar Evolution Analysis
55+
; CHECK-NEXT: Lazy Branch Probability Analysis
56+
; CHECK-NEXT: Lazy Block Frequency Analysis
57+
; CHECK-NEXT: Optimization Remark Emitter
5558
; CHECK-NEXT: Hardware Loop Insertion
5659
; CHECK-NEXT: Scalar Evolution Analysis
5760
; CHECK-NEXT: Loop Pass Manager

llvm/test/Transforms/HardwareLoops/ARM/structure.ll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops %s -S -o - | FileCheck %s
2-
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC
2+
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - -pass-remarks-analysis=hardware-loops 2>&1 | FileCheck %s --check-prefix=CHECK-LLC
33
; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
44

5+
; CHECK-LLC: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
56
; CHECK-LABEL: early_exit
67
; CHECK-NOT: llvm.set.loop.iterations
78
; CHECK-NOT: llvm.loop.decrement
@@ -46,6 +47,7 @@ do.end:
4647
; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
4748
; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7
4849

50+
; CHECK-LLC: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
4951
; CHECK-LLC: nested:
5052
; CHECK-LLC-NOT: mov lr, r1
5153
; CHECK-LLC: dls lr, r1
@@ -176,6 +178,9 @@ while.end7:
176178
ret void
177179
}
178180

181+
182+
; CHECK-LLC: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
183+
; CHECK-LLC: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
179184
; CHECK-LABEL: not_rotated
180185
; CHECK-NOT: call void @llvm.set.loop.iterations
181186
; CHECK-NOT: call i32 @llvm.loop.decrement.i32

llvm/test/Transforms/HardwareLoops/unconditional-latch.ll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
22
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
3-
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
3+
;
4+
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 \
5+
; RUN: -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true \
6+
; RUN: -hardware-loops -S -pass-remarks-analysis=hardware-loops %s -o - \
7+
; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-LATCH
8+
9+
; CHECK-LATCH: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
410

511
; CHECK-LABEL: not_rotated
612
; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations

0 commit comments

Comments
 (0)