Skip to content

Commit 13034d1

Browse files
committed
[AMDGPU][AMDGPUIfConverter] First version of AMDGPUIfConversion
1 parent ca6302a commit 13034d1

File tree

5 files changed

+158
-60
lines changed

5 files changed

+158
-60
lines changed

llvm/include/llvm/CodeGen/SSAIfConv.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,11 @@ class SSAIfConv {
8282
SmallVector<MachineOperand, 4> Cond;
8383

8484
struct PredicationStrategyBase {
85-
virtual bool canConvertIf(MachineBasicBlock *Tail) { return true; }
85+
virtual bool canConvertIf(MachineBasicBlock *Head, MachineBasicBlock *TBB,
86+
MachineBasicBlock *FBB, MachineBasicBlock *Tail,
87+
ArrayRef<MachineOperand> Cond) {
88+
return true;
89+
}
8690
virtual bool canPredicateInstr(const MachineInstr &I) = 0;
8791
/// Apply cost model and heuristics to the if-conversion in IfConv.
8892
/// Return true if the conversion is a good idea.

llvm/lib/CodeGen/EarlyIfConversion.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ struct SpeculateStrategy : SSAIfConv::PredicationStrategyBase {
7676
MachineTraceMetrics *Traces = nullptr)
7777
: Loops(Loops), SchedModel(SchedModel), Traces(Traces) {}
7878

79-
bool canConvertIf(MachineBasicBlock *Tail) override {
79+
bool canConvertIf(MachineBasicBlock *Head, MachineBasicBlock *TBB,
80+
MachineBasicBlock *FBB, MachineBasicBlock *Tail,
81+
ArrayRef<MachineOperand> Cond) override {
8082
// This is a triangle or a diamond.
8183
// Skip if we cannot predicate and there are no phis skip as there must
8284
// be side effects that can only be handled with predication.

llvm/lib/CodeGen/SSAIfConv.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
249249
return false;
250250
}
251251

252-
if (!Predicate.canConvertIf(Tail)) {
252+
if (!Predicate.canConvertIf(Head, TBB, FBB, Tail, Cond)) {
253253
return false;
254254
}
255255

llvm/lib/Target/AMDGPU/AMDGPUIfConverter.cpp

Lines changed: 147 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,158 @@
1-
#include <llvm/CodeGen/MachineBasicBlock.h>
2-
#include <llvm/CodeGen/MachineBranchProbabilityInfo.h>
3-
#include <llvm/CodeGen/MachineDominators.h>
4-
#include <llvm/CodeGen/MachineFunctionPass.h>
5-
#include <llvm/CodeGen/MachineLoopInfo.h>
6-
#include <llvm/CodeGen/SSAIfConv.h>
7-
#include <llvm/CodeGen/TargetInstrInfo.h>
8-
#include <llvm/CodeGen/TargetRegisterInfo.h>
9-
#include <llvm/CodeGen/TargetSchedule.h>
10-
#include <llvm/CodeGen/TargetSubtargetInfo.h>
11-
#include <llvm/InitializePasses.h>
1+
#include "llvm/CodeGen/MachineFunctionPass.h"
2+
#include "llvm/CodeGen/MachineRegisterInfo.h"
3+
#include "llvm/CodeGen/SSAIfConv.h"
124

135
#include "AMDGPU.h"
6+
#include "GCNSubtarget.h"
7+
#include "SIInstrInfo.h"
8+
#include "SIMachineFunctionInfo.h"
149

1510
using namespace llvm;
1611

17-
namespace {
1812
#define DEBUG_TYPE "amdgpu-if-cvt"
19-
const char PassName[] = "AMDGPU if conversion";
20-
21-
class AMDGPUIfConverter : public MachineFunctionPass {
22-
const TargetInstrInfo *TII = nullptr;
23-
const TargetRegisterInfo *TRI = nullptr;
24-
TargetSchedModel SchedModel;
25-
MachineRegisterInfo *MRI = nullptr;
26-
MachineDominatorTree *DomTree = nullptr;
27-
MachineBranchProbabilityInfo *MBPI = nullptr;
28-
MachineLoopInfo *Loops = nullptr;
29-
30-
static constexpr unsigned BlockInstrLimit = 30;
31-
static constexpr bool Stress = false;
32-
SSAIfConv IfConv{DEBUG_TYPE, BlockInstrLimit, Stress};
33-
34-
public:
35-
static char ID;
3613

37-
AMDGPUIfConverter() : MachineFunctionPass(ID) {}
14+
namespace {
15+
unsigned getReversedVCMPXOpcode(unsigned Opcode) {
16+
// TODO: this is a placeholder for the real function
17+
switch (Opcode) {
18+
case AMDGPU::V_CMPX_LT_I32_nosdst_e64:
19+
return AMDGPU::V_CMPX_GE_I32_nosdst_e64;
20+
default:
21+
errs() << "unhandled: " << Opcode << "\n";
22+
llvm_unreachable("unhandled vcmp opcode");
23+
}
24+
}
3825

39-
bool runOnMachineFunction(MachineFunction &MF) override;
26+
bool needsExecPredication(const SIInstrInfo *TII, const MachineInstr &I) {
27+
return TII->isVALU(I) || TII->isVMEM(I);
28+
}
4029

41-
void getAnalysisUsage(AnalysisUsage &AU) const override;
30+
struct ExecPredicate : SSAIfConv::PredicationStrategyBase {
31+
const SIInstrInfo *TII;
32+
const SIRegisterInfo *RegInfo;
33+
34+
MachineInstr *Cmp = nullptr;
35+
36+
ExecPredicate(const SIInstrInfo *TII)
37+
: TII(TII), RegInfo(&TII->getRegisterInfo()) {}
38+
39+
bool canConvertIf(MachineBasicBlock *Head, MachineBasicBlock *TBB,
40+
MachineBasicBlock *FBB, MachineBasicBlock *Tail,
41+
ArrayRef<MachineOperand> Cond) override {
42+
43+
// check that the cmp is just before the branch and that it is promotable to
44+
// v_cmpx
45+
const unsigned SupportedBranchOpc[]{
46+
AMDGPU::S_CBRANCH_SCC0, AMDGPU::S_CBRANCH_SCC1, AMDGPU::S_CBRANCH_VCCNZ,
47+
AMDGPU::S_CBRANCH_VCCZ};
4248

43-
bool tryConvertIf(MachineBasicBlock *);
49+
MachineInstr &CBranch = *Head->getFirstInstrTerminator();
50+
if (!llvm::is_contained(SupportedBranchOpc, CBranch.getOpcode()))
51+
return false;
52+
53+
auto CmpInstr = std::next(CBranch.getReverseIterator());
54+
if (CmpInstr == Head->instr_rend())
55+
return false;
56+
57+
Register SCCorVCC = Cond[1].getReg();
58+
bool ModifiesConditionReg = CmpInstr->modifiesRegister(SCCorVCC, RegInfo);
59+
if (!ModifiesConditionReg)
60+
return false;
61+
62+
Cmp = &*CmpInstr;
63+
64+
unsigned CmpOpc = Cmp->getOpcode();
65+
if (TII->isSALU(*Cmp))
66+
CmpOpc = TII->getVALUOp(*Cmp);
67+
if (AMDGPU::getVCMPXOpFromVCMP(CmpOpc) == -1) {
68+
errs() << "unhandled branch " << *Cmp << "\n";
69+
return false;
70+
}
71+
72+
return true;
73+
}
74+
75+
bool canPredicateInstr(const MachineInstr &I) override {
76+
77+
// TODO: relax this condition, if exec is masked, check that it goes back to
78+
// normal
79+
// TODO: what about scc or vcc ? Are they taken into acount in the MBB
80+
// live-ins ?
81+
MCRegister Exec = RegInfo->getExec();
82+
bool ModifiesExec = I.modifiesRegister(Exec, RegInfo);
83+
if (ModifiesExec)
84+
return false;
85+
86+
if (needsExecPredication(TII, I))
87+
return true;
88+
89+
bool DontMoveAcrossStore = true;
90+
bool IsSpeculatable = I.isDereferenceableInvariantLoad() ||
91+
I.isSafeToMove(DontMoveAcrossStore);
92+
if (IsSpeculatable)
93+
return true;
94+
95+
return false;
96+
}
4497

98+
bool shouldConvertIf(SSAIfConv &IfConv) override {
99+
// TODO: cost model
100+
return true;
101+
}
102+
103+
void predicateBlock(MachineBasicBlock *MBB, ArrayRef<MachineOperand> Cond,
104+
bool Reverse) override {
105+
// save exec
106+
MachineFunction &MF = *MBB->getParent();
107+
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
108+
109+
Register ExecBackup = MFI->getSGPRForEXECCopy();
110+
111+
const DebugLoc &CmpLoc = Cmp->getDebugLoc();
112+
113+
auto FirstInstruction = MBB->begin();
114+
const bool IsSCCLive =
115+
false; // asume not since the live-ins are supposed to be empty
116+
TII->insertScratchExecCopy(MF, *MBB, FirstInstruction, CmpLoc, ExecBackup,
117+
IsSCCLive);
118+
119+
// mask exec
120+
unsigned CmpOpc = Cmp->getOpcode();
121+
if (TII->isSALU(*Cmp))
122+
CmpOpc = TII->getVALUOp(*Cmp);
123+
124+
CmpOpc = AMDGPU::getVCMPXOpFromVCMP(CmpOpc);
125+
if (Reverse)
126+
CmpOpc = getReversedVCMPXOpcode(CmpOpc);
127+
128+
// TODO: handle this properly. The second block may kill those registers.
129+
Cmp->getOperand(0).setIsKill(false);
130+
Cmp->getOperand(1).setIsKill(false);
131+
132+
auto VCmpX = BuildMI(*MBB, FirstInstruction, CmpLoc, TII->get(CmpOpc));
133+
VCmpX->addOperand(Cmp->getOperand(0));
134+
VCmpX->addOperand(Cmp->getOperand(1));
135+
136+
// restore exec
137+
TII->restoreExec(MF, *MBB, MBB->end(), DebugLoc(), ExecBackup);
138+
}
139+
140+
~ExecPredicate() override = default;
141+
};
142+
143+
const char PassName[] = "AMDGPU If Conversion";
144+
145+
struct AMDGPUIfConverter : MachineFunctionPass {
146+
static char ID;
147+
AMDGPUIfConverter() : MachineFunctionPass(ID) {}
148+
void getAnalysisUsage(AnalysisUsage &AU) const override;
149+
bool runOnMachineFunction(MachineFunction &MF) override;
45150
StringRef getPassName() const override { return PassName; }
46151
};
47152

48153
char AMDGPUIfConverter::ID = 0;
49154

50155
void AMDGPUIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
51-
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
52156
AU.addRequired<MachineDominatorTreeWrapperPass>();
53157
AU.addPreserved<MachineDominatorTreeWrapperPass>();
54158
AU.addRequired<MachineLoopInfoWrapperPass>();
@@ -60,29 +164,19 @@ bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
60164
if (skipFunction(MF.getFunction()))
61165
return false;
62166

63-
const TargetSubtargetInfo &STI = MF.getSubtarget();
64-
TII = STI.getInstrInfo();
65-
TRI = STI.getRegisterInfo();
66-
MRI = &MF.getRegInfo();
67-
SchedModel.init(&STI);
68-
DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
69-
Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
70-
MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
71-
72-
bool Changed = false;
73-
IfConv.runOnMachineFunction(MF);
167+
const auto &STI = MF.getSubtarget<GCNSubtarget>();
168+
if (!STI.hasGFX10_3Insts())
169+
return false;
74170

75-
for (auto *DomNode : post_order(DomTree))
76-
if (tryConvertIf(DomNode->getBlock()))
77-
Changed = true;
171+
const SIInstrInfo *TII = STI.getInstrInfo();
172+
auto *DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
173+
auto *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
78174

79-
return Changed;
175+
ExecPredicate Predicate(TII);
176+
SSAIfConv IfConv(Predicate, MF, DomTree, Loops);
177+
return IfConv.run();
80178
}
81-
82-
bool AMDGPUIfConverter::tryConvertIf(MachineBasicBlock *MBB) { return false; }
83-
84179
} // namespace
85-
86180
char &llvm::AMDGPUIfConverterID = AMDGPUIfConverter::ID;
87181
INITIALIZE_PASS_BEGIN(AMDGPUIfConverter, DEBUG_TYPE, PassName, false, false)
88182
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)

llvm/test/CodeGen/AMDGPU/amdgpu-if-cvt.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ define amdgpu_kernel void @scalar_cmp(i32 noundef %value, ptr addrspace(8) nocap
55
; GCN-LABEL: scalar_cmp:
66
; GCN: ; %bb.0: ; %entry
77
; GCN-NEXT: s_load_dword s0, s[2:3], 0x4c
8+
; GCN-NEXT: s_or_saveexec_b32 s105, -1
89
; GCN-NEXT: s_waitcnt lgkmcnt(0)
9-
; GCN-NEXT: s_cmp_lt_i32 s0, 1
10-
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
11-
; GCN-NEXT: ; %bb.1: ; %if.then
10+
; GCN-NEXT: v_cmpx_ge_i32_e64 s0, 1
1211
; GCN-NEXT: s_clause 0x2
1312
; GCN-NEXT: s_load_dword s4, s[2:3], 0x24
1413
; GCN-NEXT: s_load_dword s5, s[2:3], 0x44
@@ -17,7 +16,6 @@ define amdgpu_kernel void @scalar_cmp(i32 noundef %value, ptr addrspace(8) nocap
1716
; GCN-NEXT: v_mov_b32_e32 v0, s4
1817
; GCN-NEXT: v_mov_b32_e32 v1, s5
1918
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
20-
; GCN-NEXT: .LBB0_2: ; %if.end
2119
; GCN-NEXT: s_endpgm
2220
entry:
2321
%cmp = icmp sgt i32 %flag, 0

0 commit comments

Comments
 (0)