16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
+ #include " AMDGPUGlobalISelUtils.h"
20
+ #include " GCNSubtarget.h"
21
+ #include " llvm/CodeGen/GlobalISel/CSEInfo.h"
22
+ #include " llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
23
+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19
24
#include " llvm/CodeGen/MachineFunctionPass.h"
25
+ #include " llvm/CodeGen/MachineInstr.h"
26
+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
27
+ #include " llvm/CodeGen/TargetPassConfig.h"
20
28
#include " llvm/InitializePasses.h"
21
29
22
30
#define DEBUG_TYPE " amdgpu-regbankselect"
23
31
24
32
using namespace llvm ;
33
+ using namespace AMDGPU ;
25
34
26
35
namespace {
27
36
@@ -40,6 +49,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
40
49
}
41
50
42
51
void getAnalysisUsage (AnalysisUsage &AU) const override {
52
+ AU.addRequired <TargetPassConfig>();
53
+ AU.addRequired <GISelCSEAnalysisWrapperPass>();
54
+ AU.addRequired <MachineUniformityAnalysisPass>();
43
55
MachineFunctionPass::getAnalysisUsage (AU);
44
56
}
45
57
@@ -55,6 +67,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
55
67
56
68
INITIALIZE_PASS_BEGIN (AMDGPURegBankSelect, DEBUG_TYPE,
57
69
" AMDGPU Register Bank Select" , false , false )
70
+ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
71
+ INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
72
+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
58
73
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
59
74
" AMDGPU Register Bank Select" , false , false )
60
75
@@ -66,9 +81,232 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
66
81
return new AMDGPURegBankSelect ();
67
82
}
68
83
84
+ class RegBankSelectHelper {
85
+ MachineIRBuilder &B;
86
+ MachineRegisterInfo &MRI;
87
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
88
+ const MachineUniformityInfo &MUI;
89
+ const SIRegisterInfo &TRI;
90
+ const RegisterBank *SgprRB;
91
+ const RegisterBank *VgprRB;
92
+ const RegisterBank *VccRB;
93
+
94
+ public:
95
+ RegBankSelectHelper (MachineIRBuilder &B,
96
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
97
+ const MachineUniformityInfo &MUI,
98
+ const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
99
+ : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI),
100
+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
101
+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
102
+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
103
+
104
+ bool shouldRegBankSelect (MachineInstr &MI) {
105
+ return MI.isPreISelOpcode () || MI.isCopy ();
106
+ }
107
+
108
+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
109
+ // the cycle
110
+ // Note: uniformity analysis does not consider that registers with vgpr def
111
+ // are divergent (you can have uniform value in vgpr).
112
+ // - TODO: implicit use of $exec could be implemented as indicator that
113
+ // instruction is divergent
114
+ bool isTemporalDivergenceCopy (Register Reg) {
115
+ MachineInstr *MI = MRI.getVRegDef (Reg);
116
+ if (!MI->isCopy ())
117
+ return false ;
118
+
119
+ for (auto Op : MI->implicit_operands ()) {
120
+ if (!Op.isReg ())
121
+ continue ;
122
+
123
+ if (Op.getReg () == TRI.getExec ()) {
124
+ return true ;
125
+ }
126
+ }
127
+
128
+ return false ;
129
+ }
130
+
131
+ void setRBDef (MachineInstr &MI, MachineOperand &DefOP,
132
+ const RegisterBank *RB) {
133
+ Register Reg = DefOP.getReg ();
134
+
135
+ if (!MRI.getRegClassOrNull (Reg)) {
136
+ MRI.setRegBank (Reg, *RB);
137
+ return ;
138
+ }
139
+
140
+ // Register that already has Register class got it during pre-inst selection
141
+ // of another instruction. Maybe cross bank copy was required so we insert a
142
+ // copy that can be removed later. This simplifies post regbanklegalize
143
+ // combiner and avoids need to special case some patterns.
144
+ LLT Ty = MRI.getType (Reg);
145
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
146
+ DefOP.setReg (NewReg);
147
+
148
+ auto &MBB = *MI.getParent ();
149
+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
150
+ B.buildCopy (Reg, NewReg);
151
+
152
+ // The problem was discovered for uniform S1 that was used as both
153
+ // lane mask(vcc) and regular sgpr S1.
154
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
155
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
156
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
157
+ // - the regular sgpr S1(uniform) instruction is now broken since
158
+ // it uses sreg_64_xexec(S1) which is divergent.
159
+
160
+ // Replace virtual registers with register class on generic instructions
161
+ // uses with virtual registers with register bank.
162
+ SmallVector<MachineOperand *, 4 > RegUsesOnGInstrs;
163
+ for (auto &UseMI : MRI.use_instructions (Reg)) {
164
+ if (shouldRegBankSelect (UseMI)) {
165
+ for (MachineOperand &Op : UseMI.operands ()) {
166
+ if (Op.isReg () && Op.getReg () == Reg)
167
+ RegUsesOnGInstrs.push_back (&Op);
168
+ }
169
+ }
170
+ }
171
+ for (MachineOperand *Op : RegUsesOnGInstrs) {
172
+ Op->setReg (NewReg);
173
+ }
174
+ }
175
+
176
+ Register tryGetVReg (MachineOperand &Op) {
177
+ if (!Op.isReg ())
178
+ return {};
179
+
180
+ Register Reg = Op.getReg ();
181
+ if (!Reg.isVirtual ())
182
+ return {};
183
+
184
+ return Reg;
185
+ }
186
+
187
+ void assignBanksOnDefs (MachineInstr &MI) {
188
+ if (!shouldRegBankSelect (MI))
189
+ return ;
190
+
191
+ for (MachineOperand &DefOP : MI.defs ()) {
192
+ Register DefReg = tryGetVReg (DefOP);
193
+ if (!DefReg.isValid ())
194
+ continue ;
195
+
196
+ // Copies can have register class on def registers.
197
+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
198
+ continue ;
199
+ }
200
+
201
+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
202
+ setRBDef (MI, DefOP, SgprRB);
203
+ } else {
204
+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
205
+ setRBDef (MI, DefOP, VccRB);
206
+ else
207
+ setRBDef (MI, DefOP, VgprRB);
208
+ }
209
+ }
210
+ }
211
+
212
+ void constrainRBUse (MachineInstr &MI, MachineOperand &UseOP,
213
+ const RegisterBank *RB) {
214
+ Register Reg = UseOP.getReg ();
215
+
216
+ LLT Ty = MRI.getType (Reg);
217
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
218
+ UseOP.setReg (NewReg);
219
+
220
+ if (MI.isPHI ()) {
221
+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
222
+ MachineBasicBlock *DefMBB = DefMI->getParent ();
223
+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
224
+ } else {
225
+ B.setInstr (MI);
226
+ }
227
+
228
+ B.buildCopy (NewReg, Reg);
229
+ }
230
+
231
+ void constrainBanksOnUses (MachineInstr &MI) {
232
+ if (!shouldRegBankSelect (MI))
233
+ return ;
234
+
235
+ // Copies can have register class on use registers.
236
+ if (MI.isCopy ())
237
+ return ;
238
+
239
+ for (MachineOperand &UseOP : MI.uses ()) {
240
+ auto UseReg = tryGetVReg (UseOP);
241
+ if (!UseReg.isValid ())
242
+ continue ;
243
+
244
+ // UseReg already has register bank.
245
+ if (MRI.getRegBankOrNull (UseReg))
246
+ continue ;
247
+
248
+ if (!isTemporalDivergenceCopy (UseReg) &&
249
+ (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg))) {
250
+ constrainRBUse (MI, UseOP, SgprRB);
251
+ } else {
252
+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
253
+ constrainRBUse (MI, UseOP, VccRB);
254
+ else
255
+ constrainRBUse (MI, UseOP, VgprRB);
256
+ }
257
+ }
258
+ }
259
+ };
260
+
69
261
bool AMDGPURegBankSelect::runOnMachineFunction (MachineFunction &MF) {
70
262
if (MF.getProperties ().hasProperty (
71
263
MachineFunctionProperties::Property::FailedISel))
72
264
return false ;
265
+
266
+ // Setup the instruction builder with CSE.
267
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
268
+ GISelCSEAnalysisWrapper &Wrapper =
269
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper ();
270
+ GISelCSEInfo &CSEInfo = Wrapper.get (TPC.getCSEConfig ());
271
+ GISelObserverWrapper Observer;
272
+ Observer.addObserver (&CSEInfo);
273
+
274
+ CSEMIRBuilder B (MF);
275
+ B.setCSEInfo (&CSEInfo);
276
+ B.setChangeObserver (Observer);
277
+
278
+ RAIIDelegateInstaller DelegateInstaller (MF, &Observer);
279
+ RAIIMFObserverInstaller MFObserverInstaller (MF, Observer);
280
+
281
+ IntrinsicLaneMaskAnalyzer ILMA (MF);
282
+ MachineUniformityInfo &MUI =
283
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
284
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
285
+ RegBankSelectHelper RBSHelper (B, ILMA, MUI, *ST.getRegisterInfo (),
286
+ *ST.getRegBankInfo ());
287
+
288
+ // Assign register banks to ALL def registers on G_ instructions.
289
+ // Same for copies if they have no register bank or class on def.
290
+ for (MachineBasicBlock &MBB : MF) {
291
+ for (MachineInstr &MI : MBB) {
292
+ RBSHelper.assignBanksOnDefs (MI);
293
+ }
294
+ }
295
+
296
+ // At this point all virtual registers have register class or bank
297
+ // - Defs of G_ instructions have register banks.
298
+ // - Defs and uses of inst-selected instructions have register class.
299
+ // - Defs and uses of copies can have either register class or bank
300
+ // and most notably:
301
+ // - Uses of G_ instructions can have either register class or bank.
302
+
303
+ // Reassign uses of G_ instructions to only have register banks.
304
+ for (MachineBasicBlock &MBB : MF) {
305
+ for (MachineInstr &MI : MBB) {
306
+ RBSHelper.constrainBanksOnUses (MI);
307
+ }
308
+ }
309
+
310
+ // Defs and uses of G_ instructions have register banks exclusively.
73
311
return true ;
74
312
}
0 commit comments