15
15
#include " GCNSubtarget.h"
16
16
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
17
17
#include " llvm/CodeGen/MachineFunctionPass.h"
18
+ #include " llvm/CodeGen/MachineTraceMetrics.h"
18
19
#include " llvm/CodeGen/TargetSchedule.h"
20
+ #include " llvm/InitializePasses.h"
19
21
#include " llvm/Support/BranchProbability.h"
20
22
21
23
using namespace llvm ;
@@ -29,6 +31,10 @@ class SIPreEmitPeephole : public MachineFunctionPass {
29
31
const SIInstrInfo *TII = nullptr ;
30
32
const SIRegisterInfo *TRI = nullptr ;
31
33
34
+ // Trace metrics analysis result, used to estimate the number of cycles it
35
+ // takes to execute a block.
36
+ MachineTraceMetrics::Ensemble *Traces;
37
+
32
38
bool optimizeVccBranch (MachineInstr &MI) const ;
33
39
bool optimizeSetGPR (MachineInstr &First, MachineInstr &MI) const ;
34
40
bool getBlockDestinations (MachineBasicBlock &SrcMBB,
@@ -37,9 +43,14 @@ class SIPreEmitPeephole : public MachineFunctionPass {
37
43
SmallVectorImpl<MachineOperand> &Cond);
38
44
bool mustRetainExeczBranch (const MachineInstr &Branch,
39
45
const MachineBasicBlock &From,
40
- const MachineBasicBlock &To) const ;
46
+ const MachineBasicBlock &To);
41
47
bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
42
48
49
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
50
+ AU.addRequired <MachineTraceMetricsWrapperPass>();
51
+ MachineFunctionPass::getAnalysisUsage (AU);
52
+ }
53
+
43
54
public:
44
55
static char ID;
45
56
@@ -52,8 +63,11 @@ class SIPreEmitPeephole : public MachineFunctionPass {
52
63
53
64
} // End anonymous namespace.
54
65
55
- INITIALIZE_PASS (SIPreEmitPeephole, DEBUG_TYPE,
56
- " SI peephole optimizations" , false , false )
66
+ INITIALIZE_PASS_BEGIN (SIPreEmitPeephole, DEBUG_TYPE,
67
+ " SI peephole optimizations" , false , false )
68
+ INITIALIZE_PASS_DEPENDENCY(MachineTraceMetricsWrapperPass)
69
+ INITIALIZE_PASS_END(SIPreEmitPeephole, DEBUG_TYPE, " SI peephole optimizations" ,
70
+ false , false )
57
71
58
72
char SIPreEmitPeephole::ID = 0;
59
73
@@ -299,58 +313,23 @@ bool SIPreEmitPeephole::getBlockDestinations(
299
313
return true ;
300
314
}
301
315
302
- namespace {
303
- class BranchWeightCostModel {
304
- const SIInstrInfo &TII;
305
- const TargetSchedModel &SchedModel;
306
- BranchProbability BranchProb;
307
- static constexpr uint64_t BranchNotTakenCost = 1 ;
308
- uint64_t BranchTakenCost;
309
- uint64_t ThenCyclesCost = 0 ;
316
+ bool SIPreEmitPeephole::mustRetainExeczBranch (const MachineInstr &Branch,
317
+ const MachineBasicBlock &From,
318
+ const MachineBasicBlock &To) {
319
+ assert (is_contained (Branch.getParent ()->successors (), &From));
310
320
311
- public:
312
- BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
313
- const MachineBasicBlock &Succ)
314
- : TII(TII), SchedModel(TII.getSchedModel()) {
315
- const MachineBasicBlock &Head = *Branch.getParent ();
316
- const auto *FromIt = find (Head.successors (), &Succ);
317
- assert (FromIt != Head.succ_end ());
318
-
319
- BranchProb = Head.getSuccProbability (FromIt);
320
- if (BranchProb.isUnknown ())
321
- BranchProb = BranchProbability::getZero ();
322
- BranchTakenCost = SchedModel.computeInstrLatency (&Branch);
323
- }
321
+ const MachineBasicBlock &Head = *Branch.getParent ();
322
+ const auto *FromIt = find (Head.successors (), &From);
323
+ assert (FromIt != Head.succ_end ());
324
324
325
- bool isProfitable ( const MachineInstr &MI) {
326
- if (TII. isWaitcnt (MI. getOpcode () ))
327
- return false ;
325
+ auto BranchProb = Head. getSuccProbability (FromIt);
326
+ if (BranchProb. isUnknown ( ))
327
+ return true ;
328
328
329
- ThenCyclesCost += SchedModel.computeInstrLatency (&MI);
330
-
331
- // Consider `P = N/D` to be the probability of execz being false (skipping
332
- // the then-block) The transformation is profitable if always executing the
333
- // 'then' block is cheaper than executing sometimes 'then' and always
334
- // executing s_cbranch_execz:
335
- // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
336
- // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
337
- // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
338
- // BranchNotTakenCost
339
- uint64_t Numerator = BranchProb.getNumerator ();
340
- uint64_t Denominator = BranchProb.getDenominator ();
341
- return (Denominator - Numerator) * ThenCyclesCost <=
342
- ((Denominator - Numerator) * BranchTakenCost +
343
- Numerator * BranchNotTakenCost);
344
- }
345
- };
329
+ const MachineFunction *MF = From.getParent ();
346
330
347
- bool SIPreEmitPeephole::mustRetainExeczBranch (
348
- const MachineInstr &Branch, const MachineBasicBlock &From,
349
- const MachineBasicBlock &To) const {
350
- assert (is_contained (Branch.getParent ()->successors (), &From));
351
- BranchWeightCostModel CostModel{*TII, Branch, From};
331
+ SmallVector<const MachineBasicBlock *> ThenBlocks;
352
332
353
- const MachineFunction *MF = From.getParent ();
354
333
for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
355
334
MBBI != End && MBBI != ToI; ++MBBI) {
356
335
const MachineBasicBlock &MBB = *MBBI;
@@ -372,14 +351,37 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
372
351
if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
373
352
return true ;
374
353
375
- if (!CostModel. isProfitable (MI))
354
+ if (TII-> isWaitcnt (MI. getOpcode () ))
376
355
return true ;
377
356
}
357
+ ThenBlocks.push_back (&MBB);
378
358
}
379
359
380
- return false ;
360
+ MachineTraceMetrics::Trace Trace = Traces->getTrace (&Head);
361
+ const MCSchedClassDesc *BranchSchedClassDesc =
362
+ TII->getSchedModel ().getMCSchedModel ()->getSchedClassDesc (
363
+ Branch.getDesc ().getSchedClass ());
364
+ unsigned ResourceThenWithoutBranch =
365
+ Trace.getResourceLength (ThenBlocks, {}, {BranchSchedClassDesc});
366
+ unsigned ResourceThenWithBranch = Trace.getResourceLength (ThenBlocks, {}, {});
367
+ unsigned ResourceElseWithBranch = Trace.getResourceLength ({}, {}, {});
368
+
369
+ // Consider `P = N/D` to be the probability of execz being false (skipping
370
+ // the then-block) The transformation is profitable if always executing the
371
+ // 'then' block is cheaper than executing sometimes 'then' and always
372
+ // executing s_cbranch_execz:
373
+ // * ThenCost <= P*BranchThenCost + (1-P)*BranchElseCost
374
+ // * D * ThenCost <= N * BranchThenCost + (D - N) * BranchElseCost
375
+ // For the resource lenght to be equivalent to the number of cycles to execute
376
+ // the block, we assume no data-dependencies between the instructions. This
377
+ // may not be true and should be refined.
378
+ uint64_t Numerator = BranchProb.getNumerator ();
379
+ uint64_t Denominator = BranchProb.getDenominator ();
380
+ bool IsProfitable = Denominator * ResourceThenWithoutBranch <=
381
+ Numerator * ResourceThenWithBranch +
382
+ (Denominator - Numerator) * ResourceElseWithBranch;
383
+ return !IsProfitable;
381
384
}
382
- } // namespace
383
385
384
386
// Returns true if the skip branch instruction is removed.
385
387
bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
@@ -414,6 +416,8 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
414
416
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
415
417
TII = ST.getInstrInfo ();
416
418
TRI = &TII->getRegisterInfo ();
419
+ Traces = getAnalysis<MachineTraceMetricsWrapperPass>().getMTM ().getEnsemble (
420
+ llvm::MachineTraceStrategy::TS_MinInstrCount);
417
421
bool Changed = false ;
418
422
419
423
MF.RenumberBlocks ();
0 commit comments