11
11
#include < llvm/InitializePasses.h>
12
12
13
13
#include " AMDGPU.h"
14
+ #include " GCNSubtarget.h"
15
+ #include " SIInstrInfo.h"
16
+ #include " SIMachineFunctionInfo.h"
14
17
15
18
using namespace llvm ;
16
19
@@ -19,10 +22,8 @@ namespace {
19
22
const char PassName[] = " AMDGPU if conversion" ;
20
23
21
24
class AMDGPUIfConverter : public MachineFunctionPass {
22
- const TargetInstrInfo *TII = nullptr ;
23
- const TargetRegisterInfo *TRI = nullptr ;
25
+ const SIInstrInfo *TII = nullptr ;
24
26
TargetSchedModel SchedModel;
25
- MachineRegisterInfo *MRI = nullptr ;
26
27
MachineDominatorTree *DomTree = nullptr ;
27
28
MachineBranchProbabilityInfo *MBPI = nullptr ;
28
29
MachineLoopInfo *Loops = nullptr ;
@@ -41,6 +42,7 @@ class AMDGPUIfConverter : public MachineFunctionPass {
41
42
void getAnalysisUsage (AnalysisUsage &AU) const override ;
42
43
43
44
bool tryConvertIf (MachineBasicBlock *);
45
+ bool shouldConvertIf ();
44
46
45
47
StringRef getPassName () const override { return PassName; }
46
48
};
@@ -60,10 +62,11 @@ bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
60
62
if (skipFunction (MF.getFunction ()))
61
63
return false ;
62
64
63
- const TargetSubtargetInfo &STI = MF.getSubtarget ();
65
+ const auto &STI = MF.getSubtarget <GCNSubtarget>();
66
+ if (!STI.hasGFX10_3Insts ())
67
+ return false ;
68
+
64
69
TII = STI.getInstrInfo ();
65
- TRI = STI.getRegisterInfo ();
66
- MRI = &MF.getRegInfo ();
67
70
SchedModel.init (&STI);
68
71
DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree ();
69
72
Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI ();
@@ -79,7 +82,193 @@ bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
79
82
return Changed;
80
83
}
81
84
82
- bool AMDGPUIfConverter::tryConvertIf (MachineBasicBlock *MBB) { return false ; }
85
+ unsigned getReversedVCMPXOpcode (unsigned Opcode) {
86
+ // TODO: this is a placeholder for the real function
87
+ switch (Opcode) {
88
+ case AMDGPU::V_CMPX_LT_I32_nosdst_e64:
89
+ return AMDGPU::V_CMPX_GE_I32_nosdst_e64;
90
+ default :
91
+ errs () << " unhandled: " << Opcode << " \n " ;
92
+ llvm_unreachable (" unhandled vcmp opcode" );
93
+ }
94
+ }
95
+
96
+ bool needsPredication (const SIInstrInfo *TII, const MachineInstr &I) {
97
+ return TII->isVALU (I) || TII->isVMEM (I);
98
+ }
99
+
100
+ struct ExecPredicate : ifcvt::PredicationStrategy {
101
+ const SIInstrInfo *TII;
102
+ const SIRegisterInfo *RegInfo;
103
+
104
+ MachineInstr *Cmp = nullptr ;
105
+
106
+ ExecPredicate (const SIInstrInfo *TII)
107
+ : TII(TII), RegInfo(&TII->getRegisterInfo ()) {}
108
+
109
+ bool canConvertIf (MachineBasicBlock *Head, MachineBasicBlock *TBB,
110
+ MachineBasicBlock *FBB, MachineBasicBlock *Tail,
111
+ ArrayRef<MachineOperand> Cond) override {
112
+
113
+ // check that the cmp is just before the branch and that it is promotable to
114
+ // v_cmpx
115
+ const unsigned SupportedBranchOpc[]{
116
+ AMDGPU::S_CBRANCH_SCC0, AMDGPU::S_CBRANCH_SCC1, AMDGPU::S_CBRANCH_VCCNZ,
117
+ AMDGPU::S_CBRANCH_VCCZ};
118
+
119
+ MachineInstr &CBranch = *Head->getFirstInstrTerminator ();
120
+ if (!llvm::is_contained (SupportedBranchOpc, CBranch.getOpcode ()))
121
+ return false ;
122
+
123
+ auto CmpInstr = std::next (CBranch.getReverseIterator ());
124
+ if (CmpInstr == Head->instr_rend ())
125
+ return false ;
126
+
127
+ Register SCCorVCC = Cond[1 ].getReg ();
128
+ bool ModifiesConditionReg = CmpInstr->modifiesRegister (SCCorVCC, RegInfo);
129
+ if (!ModifiesConditionReg)
130
+ return false ;
131
+
132
+ Cmp = &*CmpInstr;
133
+
134
+ unsigned CmpOpc = Cmp->getOpcode ();
135
+ if (TII->isSALU (*Cmp))
136
+ CmpOpc = TII->getVALUOp (*Cmp);
137
+ if (AMDGPU::getVCMPXOpFromVCMP (CmpOpc) == -1 ) {
138
+ errs () << *Cmp << " \n " ;
139
+ return false ;
140
+ }
141
+
142
+ auto NeedsPredication = [&](const MachineInstr &I) {
143
+ return needsPredication (TII, I);
144
+ };
145
+ auto BlockNeedsPredication = [&](const MachineBasicBlock *MBB) {
146
+ if (MBB == Tail)
147
+ return false ;
148
+ auto Insts = llvm::make_range (MBB->begin (), MBB->getFirstTerminator ());
149
+ return llvm::any_of (Insts, NeedsPredication);
150
+ };
151
+
152
+ MachineBasicBlock *Blocks[] = {TBB, FBB};
153
+
154
+ if (llvm::none_of (Blocks, BlockNeedsPredication))
155
+ return false ;
156
+
157
+ return true ;
158
+ }
159
+
160
+ bool canPredicate (const MachineInstr &I) override {
161
+
162
+ // TODO: relax this condition, if exec is masked, check that it goes back to
163
+ // normal
164
+ // TODO: what about scc or vcc ? Are they taken into acount in the MBB
165
+ // live-ins ?
166
+ MCRegister Exec = RegInfo->getExec ();
167
+ bool ModifiesExec = I.modifiesRegister (Exec, RegInfo);
168
+ if (ModifiesExec)
169
+ return false ;
170
+
171
+ if (needsPredication (TII, I))
172
+ return true ;
173
+
174
+ bool DontMoveAcrossStore = true ;
175
+ bool IsSpeculatable = I.isDereferenceableInvariantLoad () ||
176
+ I.isSafeToMove (DontMoveAcrossStore);
177
+ if (IsSpeculatable)
178
+ return true ;
179
+
180
+ return false ;
181
+ }
182
+
183
+ bool predicateBlock (MachineBasicBlock *MBB, ArrayRef<MachineOperand> Cond,
184
+ bool Reverse) override {
185
+ // save exec
186
+ MachineFunction &MF = *MBB->getParent ();
187
+ SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
188
+
189
+ Register ExecBackup = MFI->getSGPRForEXECCopy ();
190
+
191
+ const DebugLoc &CmpLoc = Cmp->getDebugLoc ();
192
+
193
+ auto FirstInstruction = MBB->begin ();
194
+ const bool IsSCCLive =
195
+ false ; // asume not since the live-ins are supposed to be empty
196
+ TII->insertScratchExecCopy (MF, *MBB, FirstInstruction, CmpLoc, ExecBackup,
197
+ IsSCCLive);
198
+
199
+ // mask exec
200
+ unsigned CmpOpc = Cmp->getOpcode ();
201
+ if (TII->isSALU (*Cmp))
202
+ CmpOpc = TII->getVALUOp (*Cmp);
203
+
204
+ CmpOpc = AMDGPU::getVCMPXOpFromVCMP (CmpOpc);
205
+ if (Reverse)
206
+ CmpOpc = getReversedVCMPXOpcode (CmpOpc);
207
+
208
+ // TODO: handle this properly. The second block may kill those registers.
209
+ Cmp->getOperand (0 ).setIsKill (false );
210
+ Cmp->getOperand (1 ).setIsKill (false );
211
+
212
+ auto VCmpX = BuildMI (*MBB, FirstInstruction, CmpLoc, TII->get (CmpOpc));
213
+ VCmpX->addOperand (Cmp->getOperand (0 ));
214
+ VCmpX->addOperand (Cmp->getOperand (1 ));
215
+
216
+ // restore exec
217
+ TII->restoreExec (MF, *MBB, MBB->end (), DebugLoc (), ExecBackup);
218
+
219
+ return true ;
220
+ }
221
+
222
+ ~ExecPredicate () override = default ;
223
+ };
224
+
225
+ // / Update the dominator tree after if-conversion erased some blocks.
226
+ void updateDomTree (MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
227
+ ArrayRef<MachineBasicBlock *> Removed) {
228
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
229
+ // TBB and FBB should not dominate any blocks.
230
+ // Tail children should be transferred to Head.
231
+ MachineDomTreeNode *HeadNode = DomTree->getNode (IfConv.Head );
232
+ for (auto *B : Removed) {
233
+ MachineDomTreeNode *Node = DomTree->getNode (B);
234
+ assert (Node != HeadNode && " Cannot erase the head node" );
235
+ while (Node->getNumChildren ()) {
236
+ assert (Node->getBlock () == IfConv.Tail && " Unexpected children" );
237
+ DomTree->changeImmediateDominator (Node->back (), HeadNode);
238
+ }
239
+ DomTree->eraseNode (B);
240
+ }
241
+ }
242
+
243
+ // / Update LoopInfo after if-conversion.
244
+ void updateLoops (MachineLoopInfo *Loops,
245
+ ArrayRef<MachineBasicBlock *> Removed) {
246
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
247
+ // edges, so updating LoopInfo is simply removing the dead blocks.
248
+ for (auto *B : Removed)
249
+ Loops->removeBlock (B);
250
+ }
251
+
252
+ bool AMDGPUIfConverter::shouldConvertIf () {
253
+ // TODO: cost model
254
+ return true ;
255
+ }
256
+
257
+ bool AMDGPUIfConverter::tryConvertIf (MachineBasicBlock *MBB) {
258
+ ExecPredicate Predicate{TII};
259
+ bool Changed = false ;
260
+ while (IfConv.canConvertIf (MBB, Predicate) && shouldConvertIf ()) {
261
+ // If-convert MBB and update analyses.
262
+ SmallVector<MachineBasicBlock *, 4 > RemoveBlocks;
263
+ IfConv.convertIf (RemoveBlocks, Predicate);
264
+ Changed = true ;
265
+ updateDomTree (DomTree, IfConv, RemoveBlocks);
266
+ for (MachineBasicBlock *MBB : RemoveBlocks)
267
+ MBB->eraseFromParent ();
268
+ updateLoops (Loops, RemoveBlocks);
269
+ }
270
+ return Changed;
271
+ }
83
272
84
273
} // namespace
85
274
0 commit comments