21
21
#include " llvm/ADT/Statistic.h"
22
22
#include " llvm/Analysis/AssumptionCache.h"
23
23
#include " llvm/Analysis/LoopInfo.h"
24
+ #include " llvm/Analysis/OptimizationRemarkEmitter.h"
24
25
#include " llvm/Analysis/ScalarEvolution.h"
25
26
#include " llvm/Analysis/ScalarEvolutionExpander.h"
26
27
#include " llvm/Analysis/TargetTransformInfo.h"
@@ -75,8 +76,44 @@ ForceGuardLoopEntry(
75
76
76
77
STATISTIC (NumHWLoops, " Number of loops converted to hardware loops" );
77
78
79
+ #ifndef NDEBUG
80
+ static void debugHWLoopFailure (const StringRef DebugMsg,
81
+ Instruction *I) {
82
+ dbgs () << " HWLoops: " << DebugMsg;
83
+ if (I)
84
+ dbgs () << ' ' << *I;
85
+ else
86
+ dbgs () << ' .' ;
87
+ dbgs () << ' \n ' ;
88
+ }
89
+ #endif
90
+
91
+ static OptimizationRemarkAnalysis
92
+ createHWLoopAnalysis (StringRef RemarkName, Loop *L, Instruction *I) {
93
+ Value *CodeRegion = L->getHeader ();
94
+ DebugLoc DL = L->getStartLoc ();
95
+
96
+ if (I) {
97
+ CodeRegion = I->getParent ();
98
+ // If there is no debug location attached to the instruction, revert back to
99
+ // using the loop's.
100
+ if (I->getDebugLoc ())
101
+ DL = I->getDebugLoc ();
102
+ }
103
+
104
+ OptimizationRemarkAnalysis R (DEBUG_TYPE, RemarkName, DL, CodeRegion);
105
+ R << " hardware-loop not created: " ;
106
+ return R;
107
+ }
108
+
78
109
namespace {
79
110
111
+ void reportHWLoopFailure (const StringRef Msg, const StringRef ORETag,
112
+ OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr ) {
113
+ LLVM_DEBUG (debugHWLoopFailure (Msg, I));
114
+ ORE->emit (createHWLoopAnalysis (ORETag, TheLoop, I) << Msg);
115
+ }
116
+
80
117
using TTI = TargetTransformInfo;
81
118
82
119
class HardwareLoops : public FunctionPass {
@@ -97,6 +134,7 @@ namespace {
97
134
AU.addRequired <ScalarEvolutionWrapperPass>();
98
135
AU.addRequired <AssumptionCacheTracker>();
99
136
AU.addRequired <TargetTransformInfoWrapperPass>();
137
+ AU.addRequired <OptimizationRemarkEmitterWrapperPass>();
100
138
}
101
139
102
140
// Try to convert the given Loop into a hardware loop.
@@ -110,6 +148,7 @@ namespace {
110
148
ScalarEvolution *SE = nullptr ;
111
149
LoopInfo *LI = nullptr ;
112
150
const DataLayout *DL = nullptr ;
151
+ OptimizationRemarkEmitter *ORE = nullptr ;
113
152
const TargetTransformInfo *TTI = nullptr ;
114
153
DominatorTree *DT = nullptr ;
115
154
bool PreserveLCSSA = false ;
@@ -143,8 +182,9 @@ namespace {
143
182
144
183
public:
145
184
HardwareLoop (HardwareLoopInfo &Info, ScalarEvolution &SE,
146
- const DataLayout &DL) :
147
- SE (SE), DL(DL), L(Info.L), M(L->getHeader ()->getModule()),
185
+ const DataLayout &DL,
186
+ OptimizationRemarkEmitter *ORE) :
187
+ SE (SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader ()->getModule()),
148
188
ExitCount(Info.ExitCount),
149
189
CountType(Info.CountType),
150
190
ExitBranch(Info.ExitBranch),
@@ -157,6 +197,7 @@ namespace {
157
197
private:
158
198
ScalarEvolution &SE;
159
199
const DataLayout &DL;
200
+ OptimizationRemarkEmitter *ORE = nullptr ;
160
201
Loop *L = nullptr ;
161
202
Module *M = nullptr ;
162
203
const SCEV *ExitCount = nullptr ;
@@ -182,6 +223,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
182
223
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree ();
183
224
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F);
184
225
DL = &F.getParent ()->getDataLayout ();
226
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE ();
185
227
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
186
228
LibInfo = TLIP ? &TLIP->getTLI (F) : nullptr ;
187
229
PreserveLCSSA = mustPreserveAnalysisID (LCSSAID);
@@ -201,31 +243,39 @@ bool HardwareLoops::runOnFunction(Function &F) {
201
243
// converted and the parent loop doesn't support containing a hardware loop.
202
244
bool HardwareLoops::TryConvertLoop (Loop *L) {
203
245
// Process nested loops first.
204
- for (Loop::iterator I = L->begin (), E = L->end (); I != E; ++I)
205
- if (TryConvertLoop (*I))
246
+ for (Loop::iterator I = L->begin (), E = L->end (); I != E; ++I) {
247
+ if (TryConvertLoop (*I)) {
248
+ reportHWLoopFailure (" nested hardware-loops not supported" , " HWLoopNested" ,
249
+ ORE, L);
206
250
return true ; // Stop search.
251
+ }
252
+ }
207
253
208
254
HardwareLoopInfo HWLoopInfo (L);
209
- if (!HWLoopInfo.canAnalyze (*LI))
255
+ if (!HWLoopInfo.canAnalyze (*LI)) {
256
+ reportHWLoopFailure (" cannot analyze loop, irreducible control flow" ,
257
+ " HWLoopCannotAnalyze" , ORE, L);
210
258
return false ;
259
+ }
211
260
212
- if (TTI->isHardwareLoopProfitable (L, *SE, *AC, LibInfo, HWLoopInfo) ||
213
- ForceHardwareLoops) {
214
-
215
- // Allow overriding of the counter width and loop decrement value.
216
- if (CounterBitWidth.getNumOccurrences ())
217
- HWLoopInfo.CountType =
218
- IntegerType::get (M->getContext (), CounterBitWidth);
261
+ if (!ForceHardwareLoops &&
262
+ !TTI->isHardwareLoopProfitable (L, *SE, *AC, LibInfo, HWLoopInfo)) {
263
+ reportHWLoopFailure (" it's not profitable to create a hardware-loop" ,
264
+ " HWLoopNotProfitable" , ORE, L);
265
+ return false ;
266
+ }
219
267
220
- if (LoopDecrement.getNumOccurrences ())
221
- HWLoopInfo.LoopDecrement =
222
- ConstantInt::get (HWLoopInfo.CountType , LoopDecrement);
268
+ // Allow overriding of the counter width and loop decrement value.
269
+ if (CounterBitWidth.getNumOccurrences ())
270
+ HWLoopInfo.CountType =
271
+ IntegerType::get (M->getContext (), CounterBitWidth);
223
272
224
- MadeChange |= TryConvertLoop (HWLoopInfo);
225
- return MadeChange && (! HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
226
- }
273
+ if (LoopDecrement. getNumOccurrences ())
274
+ HWLoopInfo.LoopDecrement =
275
+ ConstantInt::get (HWLoopInfo. CountType , LoopDecrement);
227
276
228
- return false ;
277
+ MadeChange |= TryConvertLoop (HWLoopInfo);
278
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
229
279
}
230
280
231
281
bool HardwareLoops::TryConvertLoop (HardwareLoopInfo &HWLoopInfo) {
@@ -234,8 +284,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
234
284
LLVM_DEBUG (dbgs () << " HWLoops: Try to convert profitable loop: " << *L);
235
285
236
286
if (!HWLoopInfo.isHardwareLoopCandidate (*SE, *LI, *DT, ForceNestedLoop,
237
- ForceHardwareLoopPHI))
287
+ ForceHardwareLoopPHI)) {
288
+ // TODO: there can be many reasons a loop is not considered a
289
+ // candidate, so we should let isHardwareLoopCandidate fill in the
290
+ // reason and then report a better message here.
291
+ reportHWLoopFailure (" loop is not a candidate" , " HWLoopNoCandidate" , ORE, L);
238
292
return false ;
293
+ }
239
294
240
295
assert (
241
296
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount ) &&
@@ -249,18 +304,21 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
249
304
if (!Preheader)
250
305
return false ;
251
306
252
- HardwareLoop HWLoop (HWLoopInfo, *SE, *DL);
307
+ HardwareLoop HWLoop (HWLoopInfo, *SE, *DL, ORE );
253
308
HWLoop.Create ();
254
309
++NumHWLoops;
255
310
return true ;
256
311
}
257
312
258
313
void HardwareLoop::Create () {
259
314
LLVM_DEBUG (dbgs () << " HWLoops: Converting loop..\n " );
260
-
315
+
261
316
Value *LoopCountInit = InitLoopCount ();
262
- if (!LoopCountInit)
317
+ if (!LoopCountInit) {
318
+ reportHWLoopFailure (" could not safely create a loop count expression" ,
319
+ " HWLoopNotSafe" , ORE, L);
263
320
return ;
321
+ }
264
322
265
323
InsertIterationSetup (LoopCountInit);
266
324
0 commit comments