15
15
#include " GCNSubtarget.h"
16
16
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
17
17
#include " llvm/CodeGen/MachineFunctionPass.h"
18
+ #include " llvm/CodeGen/MachineInstr.h"
19
+ #include " llvm/CodeGen/MachineTraceMetrics.h"
18
20
#include " llvm/CodeGen/TargetSchedule.h"
21
+ #include " llvm/InitializePasses.h"
19
22
#include " llvm/Support/BranchProbability.h"
20
23
21
24
using namespace llvm ;
@@ -29,6 +32,13 @@ class SIPreEmitPeephole : public MachineFunctionPass {
29
32
const SIInstrInfo *TII = nullptr ;
30
33
const SIRegisterInfo *TRI = nullptr ;
31
34
35
+ // Trace metrics analysis result, used to estimate the number of cycles it
36
+ // takes to execute a block. For simplicity, initialized with TS_Local
37
+ // strategy for the traces to have a single block. Then, getCriticalPath and
38
+ // getResourceDepth give the results for a single block (instead of for a
39
+ // whole trace).
40
+ MachineTraceMetrics::Ensemble *Traces;
41
+
32
42
bool optimizeVccBranch (MachineInstr &MI) const ;
33
43
bool optimizeSetGPR (MachineInstr &First, MachineInstr &MI) const ;
34
44
bool getBlockDestinations (MachineBasicBlock &SrcMBB,
@@ -37,9 +47,14 @@ class SIPreEmitPeephole : public MachineFunctionPass {
37
47
SmallVectorImpl<MachineOperand> &Cond);
38
48
bool mustRetainExeczBranch (const MachineInstr &Branch,
39
49
const MachineBasicBlock &From,
40
- const MachineBasicBlock &To) const ;
50
+ const MachineBasicBlock &To);
41
51
bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
42
52
53
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
54
+ AU.addRequired <MachineTraceMetrics>();
55
+ MachineFunctionPass::getAnalysisUsage (AU);
56
+ }
57
+
43
58
public:
44
59
static char ID;
45
60
@@ -52,8 +67,11 @@ class SIPreEmitPeephole : public MachineFunctionPass {
52
67
53
68
} // End anonymous namespace.
54
69
55
- INITIALIZE_PASS (SIPreEmitPeephole, DEBUG_TYPE,
56
- " SI peephole optimizations" , false , false )
70
+ INITIALIZE_PASS_BEGIN (SIPreEmitPeephole, DEBUG_TYPE,
71
+ " SI peephole optimizations" , false , false )
72
+ INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
73
+ INITIALIZE_PASS_END(SIPreEmitPeephole, DEBUG_TYPE, " SI peephole optimizations" ,
74
+ false , false )
57
75
58
76
char SIPreEmitPeephole::ID = 0;
59
77
@@ -299,60 +317,23 @@ bool SIPreEmitPeephole::getBlockDestinations(
299
317
return true ;
300
318
}
301
319
302
- namespace {
303
- class BranchWeightCostModel {
304
- const SIInstrInfo &TII;
305
- const TargetSchedModel &SchedModel;
306
- BranchProbability BranchProb;
307
- static constexpr uint64_t BranchNotTakenCost = 1 ;
308
- uint64_t BranchTakenCost;
309
- uint64_t ThenCyclesCost = 0 ;
310
-
311
- public:
312
- BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
313
- const MachineBasicBlock &Succ)
314
- : TII(TII), SchedModel(TII.getSchedModel()) {
315
- const MachineBasicBlock &Head = *Branch.getParent ();
316
- const auto *FromIt = find (Head.successors (), &Succ);
317
- assert (FromIt != Head.succ_end ());
318
-
319
- BranchProb = Head.getSuccProbability (FromIt);
320
- if (BranchProb.isUnknown ())
321
- BranchProb = BranchProbability::getZero ();
322
- BranchTakenCost = SchedModel.computeInstrLatency (&Branch, false );
323
- }
324
-
325
- bool isProfitable (const MachineInstr &MI) {
326
- if (TII.isWaitcnt (MI.getOpcode ()))
327
- return false ;
328
-
329
- ThenCyclesCost += SchedModel.computeInstrLatency (&MI, false );
330
-
331
- // Consider `P = N/D` to be the probability of execz being false (skipping
332
- // the then-block) The transformation is profitable if always executing the
333
- // 'then' block is cheaper than executing sometimes 'then' and always
334
- // executing s_cbranch_execz:
335
- // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
336
- // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
337
- // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
338
- // BranchNotTakenCost
339
- uint64_t Numerator = BranchProb.getNumerator ();
340
- uint64_t Denominator = BranchProb.getDenominator ();
341
- return (Denominator - Numerator) * ThenCyclesCost <=
342
- ((Denominator - Numerator) * BranchTakenCost +
343
- Numerator * BranchNotTakenCost);
344
- }
345
- };
346
-
347
- bool SIPreEmitPeephole::mustRetainExeczBranch (
348
- const MachineInstr &Branch, const MachineBasicBlock &From,
349
- const MachineBasicBlock &To) const {
320
+ bool SIPreEmitPeephole::mustRetainExeczBranch (const MachineInstr &Branch,
321
+ const MachineBasicBlock &From,
322
+ const MachineBasicBlock &To) {
350
323
351
324
const MachineBasicBlock &Head = *Branch.getParent ();
352
- assert (is_contained (Head.successors (), &From));
325
+ const auto *FromIt = find (Head.successors (), &From);
326
+ assert (FromIt != Head.succ_end ());
327
+
328
+ auto BranchProb = Head.getSuccProbability (FromIt);
329
+ if (BranchProb.isUnknown ())
330
+ return false ;
353
331
354
- BranchWeightCostModel CostModel{*TII, Branch, From};
332
+ uint64_t BranchTakenCost =
333
+ TII->getSchedModel ().computeInstrLatency (&Branch, false );
334
+ constexpr uint64_t BranchNotTakenCost = 1 ;
355
335
336
+ unsigned ThenCyclesCost = 0 ;
356
337
const MachineFunction *MF = From.getParent ();
357
338
for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
358
339
MBBI != End && MBBI != ToI; ++MBBI) {
@@ -371,14 +352,33 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
371
352
if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
372
353
return true ;
373
354
374
- if (!CostModel. isProfitable (MI))
355
+ if (TII-> isWaitcnt (MI. getOpcode () ))
375
356
return true ;
376
357
}
358
+
359
+ MachineTraceMetrics::Trace Trace = Traces->getTrace (&From);
360
+ ThenCyclesCost +=
361
+ std::max (Trace.getCriticalPath (), Trace.getResourceDepth (true ));
362
+
363
+ // Consider `P = N/D` to be the probability of execz being false (skipping
364
+ // the then-block) The transformation is profitable if always executing the
365
+ // 'then' block is cheaper than executing sometimes 'then' and always
366
+ // executing s_cbranch_execz:
367
+ // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
368
+ // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
369
+ // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
370
+ // BranchNotTakenCost
371
+ uint64_t Numerator = BranchProb.getNumerator ();
372
+ uint64_t Denominator = BranchProb.getDenominator ();
373
+ bool IsProfitable = (Denominator - Numerator) * ThenCyclesCost <=
374
+ ((Denominator - Numerator) * BranchTakenCost +
375
+ Numerator * BranchNotTakenCost);
376
+ if (!IsProfitable)
377
+ return true ;
377
378
}
378
379
379
380
return false ;
380
381
}
381
- } // namespace
382
382
383
383
// Returns true if the skip branch instruction is removed.
384
384
bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
@@ -413,6 +413,8 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
413
413
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
414
414
TII = ST.getInstrInfo ();
415
415
TRI = &TII->getRegisterInfo ();
416
+ Traces = getAnalysis<MachineTraceMetrics>().getEnsemble (
417
+ llvm::MachineTraceStrategy::TS_Local);
416
418
bool Changed = false ;
417
419
418
420
MF.RenumberBlocks ();
0 commit comments