16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
+ #include " AMDGPUGlobalISelUtils.h"
20
+ #include " AMDGPURegisterBankInfo.h"
21
+ #include " GCNSubtarget.h"
22
+ #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
23
+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19
24
#include " llvm/CodeGen/MachineFunctionPass.h"
25
+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
20
26
#include " llvm/InitializePasses.h"
21
27
22
28
#define DEBUG_TYPE " amdgpu-standalone-regbankselect"
@@ -41,6 +47,7 @@ class AMDGPUStandaloneRegBankSelect : public MachineFunctionPass {
41
47
}
42
48
43
49
void getAnalysisUsage (AnalysisUsage &AU) const override {
50
+ AU.addRequired <MachineUniformityAnalysisPass>();
44
51
MachineFunctionPass::getAnalysisUsage (AU);
45
52
}
46
53
@@ -68,9 +75,220 @@ FunctionPass *llvm::createAMDGPUStandaloneRegBankSelectPass() {
68
75
return new AMDGPUStandaloneRegBankSelect ();
69
76
}
70
77
78
+ class RegBankSelectHelper {
79
+ MachineFunction &MF;
80
+ MachineIRBuilder &B;
81
+ MachineRegisterInfo &MRI;
82
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
83
+ const MachineUniformityInfo &MUI;
84
+ const SIRegisterInfo &TRI;
85
+ const RegisterBank *SgprRB;
86
+ const RegisterBank *VgprRB;
87
+ const RegisterBank *VccRB;
88
+
89
+ public:
90
+ RegBankSelectHelper (MachineFunction &MF, MachineIRBuilder &B,
91
+ MachineRegisterInfo &MRI,
92
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
93
+ const MachineUniformityInfo &MUI,
94
+ const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
95
+ : MF(MF), B(B), MRI(MRI), ILMA(ILMA), MUI(MUI), TRI(TRI),
96
+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
97
+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
98
+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
99
+
100
+ bool shouldRegBankSelect (MachineInstr &MI) {
101
+ return MI.isPreISelOpcode () || MI.isCopy ();
102
+ }
103
+
104
+ void setRBDef (MachineInstr &MI, MachineOperand &DefOP,
105
+ const RegisterBank *RB) {
106
+ Register Reg = DefOP.getReg ();
107
+ // Register that already has Register class got it during pre-inst selection
108
+ // of another instruction. Maybe cross bank copy was required so we insert a
109
+ // copy that can be removed later. This simplifies post-rb-legalize artifact
110
+ // combiner and avoids need to special case some patterns.
111
+ if (MRI.getRegClassOrNull (Reg)) {
112
+ LLT Ty = MRI.getType (Reg);
113
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
114
+ DefOP.setReg (NewReg);
115
+
116
+ auto &MBB = *MI.getParent ();
117
+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
118
+ B.buildCopy (Reg, NewReg);
119
+
120
+ // The problem was discovered for uniform S1 that was used as both
121
+ // lane mask(vcc) and regular sgpr S1.
122
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
123
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
124
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
125
+ // - the regular sgpr S1(uniform) instruction is now broken since
126
+ // it uses sreg_64_xexec(S1) which is divergent.
127
+
128
+ // "Clear" reg classes from uses on generic instructions and put register
129
+ // banks instead.
130
+ for (auto &UseMI : MRI.use_instructions (Reg)) {
131
+ if (shouldRegBankSelect (UseMI)) {
132
+ for (MachineOperand &Op : UseMI.operands ()) {
133
+ if (Op.isReg () && Op.getReg () == Reg)
134
+ Op.setReg (NewReg);
135
+ }
136
+ }
137
+ }
138
+
139
+ } else {
140
+ MRI.setRegBank (Reg, *RB);
141
+ }
142
+ }
143
+
144
+ void constrainRBUse (MachineInstr &MI, MachineOperand &UseOP,
145
+ const RegisterBank *RB) {
146
+ Register Reg = UseOP.getReg ();
147
+
148
+ LLT Ty = MRI.getType (Reg);
149
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
150
+ UseOP.setReg (NewReg);
151
+
152
+ if (MI.isPHI ()) {
153
+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
154
+ MachineBasicBlock *DefMBB = DefMI->getParent ();
155
+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
156
+ } else {
157
+ B.setInstr (MI);
158
+ }
159
+
160
+ B.buildCopy (NewReg, Reg);
161
+ }
162
+
163
+ std::optional<Register> tryGetVReg (MachineOperand &Op) {
164
+ if (!Op.isReg ())
165
+ return std::nullopt;
166
+
167
+ Register Reg = Op.getReg ();
168
+ if (!Reg.isVirtual ())
169
+ return std::nullopt;
170
+
171
+ return Reg;
172
+ }
173
+
174
+ void assignBanksOnDefs () {
175
+ for (MachineBasicBlock &MBB : MF) {
176
+ for (MachineInstr &MI : MBB) {
177
+ if (!shouldRegBankSelect (MI))
178
+ continue ;
179
+
180
+ for (MachineOperand &DefOP : MI.defs ()) {
181
+ auto MaybeDefReg = tryGetVReg (DefOP);
182
+ if (!MaybeDefReg)
183
+ continue ;
184
+ Register DefReg = *MaybeDefReg;
185
+
186
+ // Copies can have register class on def registers.
187
+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
188
+ continue ;
189
+ }
190
+
191
+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
192
+ setRBDef (MI, DefOP, SgprRB);
193
+ } else {
194
+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
195
+ setRBDef (MI, DefOP, VccRB);
196
+ else
197
+ setRBDef (MI, DefOP, VgprRB);
198
+ }
199
+ }
200
+ }
201
+ }
202
+ }
203
+
204
+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
205
+ // the cycle
206
+ // Note: uniformity analysis does not consider that registers with vgpr def
207
+ // are divergent (you can have uniform value in vgpr).
208
+ // - TODO: implicit use of $exec could be implemented as indicator that
209
+ // instruction is divergent
210
+ bool isTemporalDivergenceCopy (Register Reg) {
211
+ MachineInstr *MI = MRI.getVRegDef (Reg);
212
+ if (!MI->isCopy ())
213
+ return false ;
214
+
215
+ for (auto Op : MI->implicit_operands ()) {
216
+ if (!Op.isReg ())
217
+ continue ;
218
+
219
+ if (Op.getReg () == TRI.getExec ()) {
220
+ return true ;
221
+ }
222
+ }
223
+
224
+ return false ;
225
+ }
226
+
227
+ void constrainBanksOnUses () {
228
+ for (MachineBasicBlock &MBB : MF) {
229
+ for (MachineInstr &MI : MBB) {
230
+ if (!shouldRegBankSelect (MI))
231
+ continue ;
232
+
233
+ // Copies can have register class on use registers.
234
+ if (MI.isCopy ())
235
+ continue ;
236
+
237
+ for (MachineOperand &UseOP : MI.uses ()) {
238
+ auto MaybeUseReg = tryGetVReg (UseOP);
239
+ if (!MaybeUseReg)
240
+ continue ;
241
+ Register UseReg = *MaybeUseReg;
242
+
243
+ // UseReg already has register bank.
244
+ if (MRI.getRegBankOrNull (UseReg))
245
+ continue ;
246
+
247
+ if (!isTemporalDivergenceCopy (UseReg) &&
248
+ (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg))) {
249
+ constrainRBUse (MI, UseOP, SgprRB);
250
+ } else {
251
+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
252
+ constrainRBUse (MI, UseOP, VccRB);
253
+ else
254
+ constrainRBUse (MI, UseOP, VgprRB);
255
+ }
256
+ }
257
+ }
258
+ }
259
+ }
260
+ };
261
+
71
262
bool AMDGPUStandaloneRegBankSelect::runOnMachineFunction (MachineFunction &MF) {
72
263
if (MF.getProperties ().hasProperty (
73
264
MachineFunctionProperties::Property::FailedISel))
74
265
return false ;
266
+
267
+ MachineUniformityInfo &MUI =
268
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
269
+ AMDGPU::IntrinsicLaneMaskAnalyzer ILMA (MF);
270
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
271
+ const SIRegisterInfo &TRI =
272
+ *MF.getSubtarget <GCNSubtarget>().getRegisterInfo ();
273
+ const RegisterBankInfo &RBI = *MF.getSubtarget ().getRegBankInfo ();
274
+
275
+ MachineIRBuilder B (MF);
276
+ RegBankSelectHelper RBSHelper (MF, B, MRI, ILMA, MUI, TRI, RBI);
277
+
278
+ // Assign register banks to ALL def registers on G_ instructions.
279
+ // Same for copies if they have no register bank or class on def.
280
+ RBSHelper.assignBanksOnDefs ();
281
+
282
+ // At this point all virtual registers have register class or bank
283
+ // - Defs of G_ instructions have register banks.
284
+ // - Defs and uses of inst-selected instructions have register class.
285
+ // - Defs and uses of copies can have either register class or bank
286
+ // and most notably
287
+ // - Uses of G_ instructions can have either register class or bank
288
+
289
+ // Reassign uses of G_ instructions to only have register banks.
290
+ RBSHelper.constrainBanksOnUses ();
291
+
292
+ // Defs and uses of G_ instructions have register banks exclusively.
75
293
return true ;
76
294
}
0 commit comments