15
15
#include " GCNSubtarget.h"
16
16
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
17
17
#include " llvm/CodeGen/MachineFunctionPass.h"
18
+ #include " llvm/CodeGen/TargetSchedule.h"
19
+ #include " llvm/Support/BranchProbability.h"
18
20
19
21
using namespace llvm ;
20
22
21
23
#define DEBUG_TYPE " si-pre-emit-peephole"
22
24
23
- static unsigned SkipThreshold;
24
-
25
- static cl::opt<unsigned , true > SkipThresholdFlag (
26
- " amdgpu-skip-threshold" , cl::Hidden,
27
- cl::desc (
28
- " Number of instructions before jumping over divergent control flow" ),
29
- cl::location(SkipThreshold), cl::init(12 ));
30
-
31
25
namespace {
32
26
33
27
class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
41
35
MachineBasicBlock *&TrueMBB,
42
36
MachineBasicBlock *&FalseMBB,
43
37
SmallVectorImpl<MachineOperand> &Cond);
44
- bool mustRetainExeczBranch (const MachineBasicBlock &From,
38
+ bool mustRetainExeczBranch (const MachineInstr &Branch,
39
+ const MachineBasicBlock &From,
45
40
const MachineBasicBlock &To) const ;
46
41
bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
47
42
@@ -304,11 +299,61 @@ bool SIPreEmitPeephole::getBlockDestinations(
304
299
return true ;
305
300
}
306
301
302
+ namespace {
303
+ class BranchWeightCostModel {
304
+ const SIInstrInfo &TII;
305
+ const TargetSchedModel &SchedModel;
306
+ BranchProbability BranchProb;
307
+ static constexpr uint64_t BranchNotTakenCost = 1 ;
308
+ uint64_t BranchTakenCost;
309
+ uint64_t ThenCyclesCost = 0 ;
310
+
311
+ public:
312
+ BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
313
+ const MachineBasicBlock &Succ)
314
+ : TII(TII), SchedModel(TII.getSchedModel()) {
315
+ const MachineBasicBlock &Head = *Branch.getParent ();
316
+ const auto FromIt = find (Head.successors (), &Succ);
317
+ assert (FromIt != Head.succ_end ());
318
+
319
+ BranchProb = Head.getSuccProbability (FromIt);
320
+ if (BranchProb.isUnknown ())
321
+ BranchProb = BranchProbability::getZero ();
322
+ BranchTakenCost = SchedModel.computeInstrLatency (&Branch);
323
+ }
324
+
325
+ bool isProfitable (const MachineInstr &MI) {
326
+ if (TII.isWaitcnt (MI.getOpcode ()))
327
+ return false ;
328
+
329
+ ThenCyclesCost += SchedModel.computeInstrLatency (&MI);
330
+
331
+ // Consider `P = N/D` to be the probability of execz being false (skipping
332
+ // the then-block) The transformation is profitable if always executing the
333
+ // 'then' block is cheaper than executing sometimes 'then' and always
334
+ // executing s_cbranch_execz:
335
+ // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
336
+ // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
337
+ // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
338
+ // BranchNotTakenCost
339
+ uint64_t Numerator = BranchProb.getNumerator ();
340
+ uint64_t Denominator = BranchProb.getDenominator ();
341
+ return (Denominator - Numerator) * ThenCyclesCost <=
342
+ ((Denominator - Numerator) * BranchTakenCost +
343
+ Numerator * BranchNotTakenCost);
344
+ }
345
+ };
346
+
307
347
bool SIPreEmitPeephole::mustRetainExeczBranch (
308
- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309
- unsigned NumInstr = 0 ;
310
- const MachineFunction *MF = From.getParent ();
348
+ const MachineInstr &Branch, const MachineBasicBlock &From,
349
+ const MachineBasicBlock &To) const {
350
+
351
+ const MachineBasicBlock &Head = *Branch.getParent ();
352
+ assert (is_contained (Head.successors (), &From));
353
+
354
+ BranchWeightCostModel CostModel{*TII, Branch, From};
311
355
356
+ const MachineFunction *MF = From.getParent ();
312
357
for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
313
358
MBBI != End && MBBI != ToI; ++MBBI) {
314
359
const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +371,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326
371
if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
327
372
return true ;
328
373
329
- // These instructions are potentially expensive even if EXEC = 0.
330
- if (TII->isSMRD (MI) || TII->isVMEM (MI) || TII->isFLAT (MI) ||
331
- TII->isDS (MI) || TII->isWaitcnt (MI.getOpcode ()))
332
- return true ;
333
-
334
- ++NumInstr;
335
- if (NumInstr >= SkipThreshold)
374
+ if (!CostModel.isProfitable (MI))
336
375
return true ;
337
376
}
338
377
}
339
378
340
379
return false ;
341
380
}
381
+ } // namespace
342
382
343
383
// Returns true if the skip branch instruction is removed.
344
384
bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
345
385
MachineBasicBlock &SrcMBB) {
386
+
387
+ if (!TII->getSchedModel ().hasInstrSchedModel ())
388
+ return false ;
389
+
346
390
MachineBasicBlock *TrueMBB = nullptr ;
347
391
MachineBasicBlock *FalseMBB = nullptr ;
348
392
SmallVector<MachineOperand, 1 > Cond;
@@ -351,8 +395,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351
395
return false ;
352
396
353
397
// Consider only the forward branches.
354
- if ((SrcMBB.getNumber () >= TrueMBB->getNumber ()) ||
355
- mustRetainExeczBranch (*FalseMBB, *TrueMBB))
398
+ if (SrcMBB.getNumber () >= TrueMBB->getNumber ())
399
+ return false ;
400
+
401
+ // Consider only when it is legal and profitable
402
+ if (mustRetainExeczBranch (MI, *FalseMBB, *TrueMBB))
356
403
return false ;
357
404
358
405
LLVM_DEBUG (dbgs () << " Removing the execz branch: " << MI);
0 commit comments