Skip to content

Commit 6fa759e

Browse files
committed
[AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: use BranchProbability and TargetSchedmodel (llvm#109818)
Remove s_cbranch_execnz branches if the transformation is profitable according to `BranchProbability` and `TargetSchedmodel`.
1 parent c940ca7 commit 6fa759e

21 files changed

+277
-228
lines changed

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,13 @@
1515
#include "GCNSubtarget.h"
1616
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1717
#include "llvm/CodeGen/MachineFunctionPass.h"
18+
#include "llvm/CodeGen/TargetSchedule.h"
19+
#include "llvm/Support/BranchProbability.h"
1820

1921
using namespace llvm;
2022

2123
#define DEBUG_TYPE "si-pre-emit-peephole"
2224

23-
static unsigned SkipThreshold;
24-
25-
static cl::opt<unsigned, true> SkipThresholdFlag(
26-
"amdgpu-skip-threshold", cl::Hidden,
27-
cl::desc(
28-
"Number of instructions before jumping over divergent control flow"),
29-
cl::location(SkipThreshold), cl::init(12));
30-
3125
namespace {
3226

3327
class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4135
MachineBasicBlock *&TrueMBB,
4236
MachineBasicBlock *&FalseMBB,
4337
SmallVectorImpl<MachineOperand> &Cond);
44-
bool mustRetainExeczBranch(const MachineBasicBlock &From,
38+
bool mustRetainExeczBranch(const MachineInstr &Branch,
39+
const MachineBasicBlock &From,
4540
const MachineBasicBlock &To) const;
4641
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
4742

@@ -304,11 +299,61 @@ bool SIPreEmitPeephole::getBlockDestinations(
304299
return true;
305300
}
306301

302+
namespace {
303+
class BranchWeightCostModel {
304+
const SIInstrInfo &TII;
305+
const TargetSchedModel &SchedModel;
306+
BranchProbability BranchProb;
307+
static constexpr uint64_t BranchNotTakenCost = 1;
308+
uint64_t BranchTakenCost;
309+
uint64_t ThenCyclesCost = 0;
310+
311+
public:
312+
BranchWeightCostModel(const SIInstrInfo &TII, const MachineInstr &Branch,
313+
const MachineBasicBlock &Succ)
314+
: TII(TII), SchedModel(TII.getSchedModel()) {
315+
const MachineBasicBlock &Head = *Branch.getParent();
316+
const auto FromIt = find(Head.successors(), &Succ);
317+
assert(FromIt != Head.succ_end());
318+
319+
BranchProb = Head.getSuccProbability(FromIt);
320+
if (BranchProb.isUnknown())
321+
BranchProb = BranchProbability::getZero();
322+
BranchTakenCost = SchedModel.computeInstrLatency(&Branch);
323+
}
324+
325+
bool isProfitable(const MachineInstr &MI) {
326+
if (TII.isWaitcnt(MI.getOpcode()))
327+
return false;
328+
329+
ThenCyclesCost += SchedModel.computeInstrLatency(&MI);
330+
331+
// Consider `P = N/D` to be the probability of execz being false (skipping
332+
// the then-block) The transformation is profitable if always executing the
333+
// 'then' block is cheaper than executing sometimes 'then' and always
334+
// executing s_cbranch_execz:
335+
// * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
336+
// * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
337+
// * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
338+
// BranchNotTakenCost
339+
uint64_t Numerator = BranchProb.getNumerator();
340+
uint64_t Denominator = BranchProb.getDenominator();
341+
return (Denominator - Numerator) * ThenCyclesCost <=
342+
((Denominator - Numerator) * BranchTakenCost +
343+
Numerator * BranchNotTakenCost);
344+
}
345+
};
346+
307347
bool SIPreEmitPeephole::mustRetainExeczBranch(
308-
const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309-
unsigned NumInstr = 0;
310-
const MachineFunction *MF = From.getParent();
348+
const MachineInstr &Branch, const MachineBasicBlock &From,
349+
const MachineBasicBlock &To) const {
350+
351+
const MachineBasicBlock &Head = *Branch.getParent();
352+
assert(is_contained(Head.successors(), &From));
353+
354+
BranchWeightCostModel CostModel{*TII, Branch, From};
311355

356+
const MachineFunction *MF = From.getParent();
312357
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
313358
MBBI != End && MBBI != ToI; ++MBBI) {
314359
const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +371,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326371
if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
327372
return true;
328373

329-
// These instructions are potentially expensive even if EXEC = 0.
330-
if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
331-
TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
332-
return true;
333-
334-
++NumInstr;
335-
if (NumInstr >= SkipThreshold)
374+
if (!CostModel.isProfitable(MI))
336375
return true;
337376
}
338377
}
339378

340379
return false;
341380
}
381+
} // namespace
342382

343383
// Returns true if the skip branch instruction is removed.
344384
bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
345385
MachineBasicBlock &SrcMBB) {
386+
387+
if (!TII->getSchedModel().hasInstrSchedModel())
388+
return false;
389+
346390
MachineBasicBlock *TrueMBB = nullptr;
347391
MachineBasicBlock *FalseMBB = nullptr;
348392
SmallVector<MachineOperand, 1> Cond;
@@ -351,8 +395,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351395
return false;
352396

353397
// Consider only the forward branches.
354-
if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
355-
mustRetainExeczBranch(*FalseMBB, *TrueMBB))
398+
if (SrcMBB.getNumber() >= TrueMBB->getNumber())
399+
return false;
400+
401+
// Consider only when it is legal and profitable
402+
if (mustRetainExeczBranch(MI, *FalseMBB, *TrueMBB))
356403
return false;
357404

358405
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);

0 commit comments

Comments
 (0)