15
15
#include " GCNSubtarget.h"
16
16
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
17
17
#include " llvm/CodeGen/MachineFunctionPass.h"
18
+ #include " llvm/CodeGen/MachineInstr.h"
19
+ #include " llvm/CodeGen/MachineTraceMetrics.h"
18
20
#include " llvm/CodeGen/TargetSchedule.h"
21
+ #include " llvm/InitializePasses.h"
19
22
#include " llvm/Support/BranchProbability.h"
20
23
21
24
using namespace llvm ;
@@ -28,6 +31,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
28
31
private:
29
32
const SIInstrInfo *TII = nullptr ;
30
33
const SIRegisterInfo *TRI = nullptr ;
34
+ MachineTraceMetrics *Traces = nullptr ;
35
+ MachineTraceMetrics::Ensemble *MinInstr;
31
36
32
37
bool optimizeVccBranch (MachineInstr &MI) const ;
33
38
bool optimizeSetGPR (MachineInstr &First, MachineInstr &MI) const ;
@@ -37,9 +42,14 @@ class SIPreEmitPeephole : public MachineFunctionPass {
37
42
SmallVectorImpl<MachineOperand> &Cond);
38
43
bool mustRetainExeczBranch (const MachineInstr &Branch,
39
44
const MachineBasicBlock &From,
40
- const MachineBasicBlock &To) const ;
45
+ const MachineBasicBlock &To);
41
46
bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
42
47
48
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
49
+ AU.addRequired <MachineTraceMetrics>();
50
+ MachineFunctionPass::getAnalysisUsage (AU);
51
+ }
52
+
43
53
public:
44
54
static char ID;
45
55
@@ -52,8 +62,11 @@ class SIPreEmitPeephole : public MachineFunctionPass {
52
62
53
63
} // End anonymous namespace.
54
64
55
- INITIALIZE_PASS (SIPreEmitPeephole, DEBUG_TYPE,
56
- " SI peephole optimizations" , false , false )
65
+ INITIALIZE_PASS_BEGIN (SIPreEmitPeephole, DEBUG_TYPE,
66
+ " SI peephole optimizations" , false , false )
67
+ INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
68
+ INITIALIZE_PASS_END(SIPreEmitPeephole, DEBUG_TYPE, " SI peephole optimizations" ,
69
+ false , false )
57
70
58
71
char SIPreEmitPeephole::ID = 0;
59
72
@@ -299,61 +312,21 @@ bool SIPreEmitPeephole::getBlockDestinations(
299
312
return true ;
300
313
}
301
314
302
- namespace {
303
- class BranchWeightCostModel {
304
- const SIInstrInfo &TII;
305
- const TargetSchedModel &SchedModel;
306
- BranchProbability BranchProb;
307
- static constexpr uint64_t BranchNotTakenCost = 1 ;
308
- uint64_t BranchTakenCost;
309
- uint64_t ThenCyclesCost = 0 ;
310
-
311
- public:
312
- BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
313
- const MachineBasicBlock &Succ)
314
- : TII(TII), SchedModel(TII.getSchedModel()) {
315
- assert (SchedModel.hasInstrSchedModelOrItineraries ());
316
-
317
- const MachineBasicBlock &Head = *Branch.getParent ();
318
- const auto *FromIt = find (Head.successors (), &Succ);
319
- assert (FromIt != Head.succ_end ());
320
-
321
- BranchProb = Head.getSuccProbability (FromIt);
322
- assert (!BranchProb.isUnknown ());
323
- BranchTakenCost = SchedModel.computeInstrLatency (&Branch, false );
324
- }
325
-
326
- bool isProfitable (const MachineInstr &MI) {
327
- if (TII.isWaitcnt (MI.getOpcode ()))
328
- return false ;
329
-
330
- ThenCyclesCost += SchedModel.computeInstrLatency (&MI, false );
331
-
332
- // Consider `P = N/D` to be the probability of execz being false (skipping
333
- // the then-block) The transformation is profitable if always executing the
334
- // 'then' block is cheaper than executing sometimes 'then' and always
335
- // executing s_cbranch_execz:
336
- // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNonTakenCost
337
- // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNonTakenCost
338
- // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
339
- // BranchNonTakenCost
340
- uint64_t Numerator = BranchProb.getNumerator ();
341
- uint64_t Denominator = BranchProb.getDenominator ();
342
- return (Denominator - Numerator) * ThenCyclesCost <=
343
- ((Denominator - Numerator) * BranchTakenCost +
344
- Numerator * BranchNotTakenCost);
345
- }
346
- };
347
-
348
- bool SIPreEmitPeephole::mustRetainExeczBranch (
349
- const MachineInstr &Branch, const MachineBasicBlock &From,
350
- const MachineBasicBlock &To) const {
315
+ bool SIPreEmitPeephole::mustRetainExeczBranch (const MachineInstr &Branch,
316
+ const MachineBasicBlock &From,
317
+ const MachineBasicBlock &To) {
351
318
352
319
const MachineBasicBlock &Head = *Branch.getParent ();
353
- assert (is_contained (Head.successors (), &From));
320
+ const auto *FromIt = find (Head.successors (), &From);
321
+ assert (FromIt != Head.succ_end ());
354
322
355
- BranchWeightCostModel CostModel{*TII, Branch, From};
323
+ auto BranchProb = Head.getSuccProbability (FromIt);
324
+ assert (!BranchProb.isUnknown ());
325
+ uint64_t BranchTakenCost =
326
+ TII->getSchedModel ().computeInstrLatency (&Branch, false );
327
+ constexpr uint64_t BranchNotTakenCost = 1 ;
356
328
329
+ unsigned ThenCyclesCost = 0 ;
357
330
const MachineFunction *MF = From.getParent ();
358
331
for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
359
332
MBBI != End && MBBI != ToI; ++MBBI) {
@@ -372,14 +345,36 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
372
345
if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
373
346
return true ;
374
347
375
- if (!CostModel. isProfitable (MI))
348
+ if (TII-> isWaitcnt (MI. getOpcode () ))
376
349
return true ;
377
350
}
351
+
352
+ if (!MinInstr)
353
+ MinInstr = Traces->getEnsemble (MachineTraceStrategy::TS_Local);
354
+
355
+ MachineTraceMetrics::Trace Trace = MinInstr->getTrace (&From);
356
+ ThenCyclesCost +=
357
+ std::max (Trace.getCriticalPath (), Trace.getResourceDepth (true ));
358
+
359
+ // Consider `P = N/D` to be the probability of execz being false (skipping
360
+ // the then-block) The transformation is profitable if always executing the
361
+ // 'then' block is cheaper than executing sometimes 'then' and always
362
+ // executing s_cbranch_execz:
363
+ // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNonTakenCost
364
+ // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNonTakenCost
365
+ // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
366
+ // BranchNonTakenCost
367
+ uint64_t Numerator = BranchProb.getNumerator ();
368
+ uint64_t Denominator = BranchProb.getDenominator ();
369
+ bool IsProfitable = (Denominator - Numerator) * ThenCyclesCost <=
370
+ ((Denominator - Numerator) * BranchTakenCost +
371
+ Numerator * BranchNotTakenCost);
372
+ if (!IsProfitable)
373
+ return true ;
378
374
}
379
375
380
376
return false ;
381
377
}
382
- } // namespace
383
378
384
379
// Returns true if the skip branch instruction is removed.
385
380
bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
@@ -414,6 +409,8 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
414
409
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
415
410
TII = ST.getInstrInfo ();
416
411
TRI = &TII->getRegisterInfo ();
412
+ Traces = &getAnalysis<MachineTraceMetrics>();
413
+ MinInstr = nullptr ;
417
414
bool Changed = false ;
418
415
419
416
MF.RenumberBlocks ();
0 commit comments