@@ -137,29 +137,274 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
137
137
if (MFI->isStackRealigned ())
138
138
Info.PrivateSegmentSize += FrameInfo.getMaxAlign ().value ();
139
139
140
- Info.UsesVCC = MRI.isPhysRegUsed (AMDGPU::VCC);
141
-
142
- Info.NumVGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
143
- Info.NumExplicitSGPR =
144
- TRI.getNumDefinedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
145
- if (ST.hasMAIInsts ())
146
- Info.NumAGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
147
-
148
- // Preloaded registers are written by the hardware, not defined in the
149
- // function body, so they need special handling.
150
- if (MFI->isEntryFunction ()) {
151
- Info.NumExplicitSGPR =
152
- std::max<int32_t >(Info.NumExplicitSGPR , MFI->getNumPreloadedSGPRs ());
153
- Info.NumVGPR = std::max<int32_t >(Info.NumVGPR , MFI->getNumPreloadedVGPRs ());
154
- }
155
-
156
- if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ())
140
+ Info.UsesVCC =
141
+ MRI.isPhysRegUsed (AMDGPU::VCC_LO) || MRI.isPhysRegUsed (AMDGPU::VCC_HI);
142
+
143
+ // If there are no calls, MachineRegisterInfo can tell us the used register
144
+ // count easily.
145
+ // A tail call isn't considered a call for MachineFrameInfo's purposes.
146
+ if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ()) {
147
+ Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
148
+ Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
149
+ if (ST.hasMAIInsts ())
150
+ Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
157
151
return Info;
152
+ }
158
153
154
+ int32_t MaxVGPR = -1 ;
155
+ int32_t MaxAGPR = -1 ;
156
+ int32_t MaxSGPR = -1 ;
159
157
Info.CalleeSegmentSize = 0 ;
160
158
161
159
for (const MachineBasicBlock &MBB : MF) {
162
160
for (const MachineInstr &MI : MBB) {
161
+ // TODO: Check regmasks? Do they occur anywhere except calls?
162
+ for (const MachineOperand &MO : MI.operands ()) {
163
+ unsigned Width = 0 ;
164
+ bool IsSGPR = false ;
165
+ bool IsAGPR = false ;
166
+
167
+ if (!MO.isReg ())
168
+ continue ;
169
+
170
+ Register Reg = MO.getReg ();
171
+ switch (Reg) {
172
+ case AMDGPU::EXEC:
173
+ case AMDGPU::EXEC_LO:
174
+ case AMDGPU::EXEC_HI:
175
+ case AMDGPU::SCC:
176
+ case AMDGPU::M0:
177
+ case AMDGPU::M0_LO16:
178
+ case AMDGPU::M0_HI16:
179
+ case AMDGPU::SRC_SHARED_BASE_LO:
180
+ case AMDGPU::SRC_SHARED_BASE:
181
+ case AMDGPU::SRC_SHARED_LIMIT_LO:
182
+ case AMDGPU::SRC_SHARED_LIMIT:
183
+ case AMDGPU::SRC_PRIVATE_BASE_LO:
184
+ case AMDGPU::SRC_PRIVATE_BASE:
185
+ case AMDGPU::SRC_PRIVATE_LIMIT_LO:
186
+ case AMDGPU::SRC_PRIVATE_LIMIT:
187
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
188
+ case AMDGPU::SGPR_NULL:
189
+ case AMDGPU::SGPR_NULL64:
190
+ case AMDGPU::MODE:
191
+ continue ;
192
+
193
+ case AMDGPU::NoRegister:
194
+ assert (MI.isDebugInstr () &&
195
+ " Instruction uses invalid noreg register" );
196
+ continue ;
197
+
198
+ case AMDGPU::VCC:
199
+ case AMDGPU::VCC_LO:
200
+ case AMDGPU::VCC_HI:
201
+ case AMDGPU::VCC_LO_LO16:
202
+ case AMDGPU::VCC_LO_HI16:
203
+ case AMDGPU::VCC_HI_LO16:
204
+ case AMDGPU::VCC_HI_HI16:
205
+ Info.UsesVCC = true ;
206
+ continue ;
207
+
208
+ case AMDGPU::FLAT_SCR:
209
+ case AMDGPU::FLAT_SCR_LO:
210
+ case AMDGPU::FLAT_SCR_HI:
211
+ continue ;
212
+
213
+ case AMDGPU::XNACK_MASK:
214
+ case AMDGPU::XNACK_MASK_LO:
215
+ case AMDGPU::XNACK_MASK_HI:
216
+ llvm_unreachable (" xnack_mask registers should not be used" );
217
+
218
+ case AMDGPU::LDS_DIRECT:
219
+ llvm_unreachable (" lds_direct register should not be used" );
220
+
221
+ case AMDGPU::TBA:
222
+ case AMDGPU::TBA_LO:
223
+ case AMDGPU::TBA_HI:
224
+ case AMDGPU::TMA:
225
+ case AMDGPU::TMA_LO:
226
+ case AMDGPU::TMA_HI:
227
+ llvm_unreachable (" trap handler registers should not be used" );
228
+
229
+ case AMDGPU::SRC_VCCZ:
230
+ llvm_unreachable (" src_vccz register should not be used" );
231
+
232
+ case AMDGPU::SRC_EXECZ:
233
+ llvm_unreachable (" src_execz register should not be used" );
234
+
235
+ case AMDGPU::SRC_SCC:
236
+ llvm_unreachable (" src_scc register should not be used" );
237
+
238
+ default :
239
+ break ;
240
+ }
241
+
242
+ if (AMDGPU::SGPR_32RegClass.contains (Reg) ||
243
+ AMDGPU::SGPR_LO16RegClass.contains (Reg) ||
244
+ AMDGPU::SGPR_HI16RegClass.contains (Reg)) {
245
+ IsSGPR = true ;
246
+ Width = 1 ;
247
+ } else if (AMDGPU::VGPR_32RegClass.contains (Reg) ||
248
+ AMDGPU::VGPR_16RegClass.contains (Reg)) {
249
+ IsSGPR = false ;
250
+ Width = 1 ;
251
+ } else if (AMDGPU::AGPR_32RegClass.contains (Reg) ||
252
+ AMDGPU::AGPR_LO16RegClass.contains (Reg)) {
253
+ IsSGPR = false ;
254
+ IsAGPR = true ;
255
+ Width = 1 ;
256
+ } else if (AMDGPU::SGPR_64RegClass.contains (Reg)) {
257
+ IsSGPR = true ;
258
+ Width = 2 ;
259
+ } else if (AMDGPU::VReg_64RegClass.contains (Reg)) {
260
+ IsSGPR = false ;
261
+ Width = 2 ;
262
+ } else if (AMDGPU::AReg_64RegClass.contains (Reg)) {
263
+ IsSGPR = false ;
264
+ IsAGPR = true ;
265
+ Width = 2 ;
266
+ } else if (AMDGPU::VReg_96RegClass.contains (Reg)) {
267
+ IsSGPR = false ;
268
+ Width = 3 ;
269
+ } else if (AMDGPU::SReg_96RegClass.contains (Reg)) {
270
+ IsSGPR = true ;
271
+ Width = 3 ;
272
+ } else if (AMDGPU::AReg_96RegClass.contains (Reg)) {
273
+ IsSGPR = false ;
274
+ IsAGPR = true ;
275
+ Width = 3 ;
276
+ } else if (AMDGPU::SGPR_128RegClass.contains (Reg)) {
277
+ IsSGPR = true ;
278
+ Width = 4 ;
279
+ } else if (AMDGPU::VReg_128RegClass.contains (Reg)) {
280
+ IsSGPR = false ;
281
+ Width = 4 ;
282
+ } else if (AMDGPU::AReg_128RegClass.contains (Reg)) {
283
+ IsSGPR = false ;
284
+ IsAGPR = true ;
285
+ Width = 4 ;
286
+ } else if (AMDGPU::VReg_160RegClass.contains (Reg)) {
287
+ IsSGPR = false ;
288
+ Width = 5 ;
289
+ } else if (AMDGPU::SReg_160RegClass.contains (Reg)) {
290
+ IsSGPR = true ;
291
+ Width = 5 ;
292
+ } else if (AMDGPU::AReg_160RegClass.contains (Reg)) {
293
+ IsSGPR = false ;
294
+ IsAGPR = true ;
295
+ Width = 5 ;
296
+ } else if (AMDGPU::VReg_192RegClass.contains (Reg)) {
297
+ IsSGPR = false ;
298
+ Width = 6 ;
299
+ } else if (AMDGPU::SReg_192RegClass.contains (Reg)) {
300
+ IsSGPR = true ;
301
+ Width = 6 ;
302
+ } else if (AMDGPU::AReg_192RegClass.contains (Reg)) {
303
+ IsSGPR = false ;
304
+ IsAGPR = true ;
305
+ Width = 6 ;
306
+ } else if (AMDGPU::VReg_224RegClass.contains (Reg)) {
307
+ IsSGPR = false ;
308
+ Width = 7 ;
309
+ } else if (AMDGPU::SReg_224RegClass.contains (Reg)) {
310
+ IsSGPR = true ;
311
+ Width = 7 ;
312
+ } else if (AMDGPU::AReg_224RegClass.contains (Reg)) {
313
+ IsSGPR = false ;
314
+ IsAGPR = true ;
315
+ Width = 7 ;
316
+ } else if (AMDGPU::SReg_256RegClass.contains (Reg)) {
317
+ IsSGPR = true ;
318
+ Width = 8 ;
319
+ } else if (AMDGPU::VReg_256RegClass.contains (Reg)) {
320
+ IsSGPR = false ;
321
+ Width = 8 ;
322
+ } else if (AMDGPU::AReg_256RegClass.contains (Reg)) {
323
+ IsSGPR = false ;
324
+ IsAGPR = true ;
325
+ Width = 8 ;
326
+ } else if (AMDGPU::VReg_288RegClass.contains (Reg)) {
327
+ IsSGPR = false ;
328
+ Width = 9 ;
329
+ } else if (AMDGPU::SReg_288RegClass.contains (Reg)) {
330
+ IsSGPR = true ;
331
+ Width = 9 ;
332
+ } else if (AMDGPU::AReg_288RegClass.contains (Reg)) {
333
+ IsSGPR = false ;
334
+ IsAGPR = true ;
335
+ Width = 9 ;
336
+ } else if (AMDGPU::VReg_320RegClass.contains (Reg)) {
337
+ IsSGPR = false ;
338
+ Width = 10 ;
339
+ } else if (AMDGPU::SReg_320RegClass.contains (Reg)) {
340
+ IsSGPR = true ;
341
+ Width = 10 ;
342
+ } else if (AMDGPU::AReg_320RegClass.contains (Reg)) {
343
+ IsSGPR = false ;
344
+ IsAGPR = true ;
345
+ Width = 10 ;
346
+ } else if (AMDGPU::VReg_352RegClass.contains (Reg)) {
347
+ IsSGPR = false ;
348
+ Width = 11 ;
349
+ } else if (AMDGPU::SReg_352RegClass.contains (Reg)) {
350
+ IsSGPR = true ;
351
+ Width = 11 ;
352
+ } else if (AMDGPU::AReg_352RegClass.contains (Reg)) {
353
+ IsSGPR = false ;
354
+ IsAGPR = true ;
355
+ Width = 11 ;
356
+ } else if (AMDGPU::VReg_384RegClass.contains (Reg)) {
357
+ IsSGPR = false ;
358
+ Width = 12 ;
359
+ } else if (AMDGPU::SReg_384RegClass.contains (Reg)) {
360
+ IsSGPR = true ;
361
+ Width = 12 ;
362
+ } else if (AMDGPU::AReg_384RegClass.contains (Reg)) {
363
+ IsSGPR = false ;
364
+ IsAGPR = true ;
365
+ Width = 12 ;
366
+ } else if (AMDGPU::SReg_512RegClass.contains (Reg)) {
367
+ IsSGPR = true ;
368
+ Width = 16 ;
369
+ } else if (AMDGPU::VReg_512RegClass.contains (Reg)) {
370
+ IsSGPR = false ;
371
+ Width = 16 ;
372
+ } else if (AMDGPU::AReg_512RegClass.contains (Reg)) {
373
+ IsSGPR = false ;
374
+ IsAGPR = true ;
375
+ Width = 16 ;
376
+ } else if (AMDGPU::SReg_1024RegClass.contains (Reg)) {
377
+ IsSGPR = true ;
378
+ Width = 32 ;
379
+ } else if (AMDGPU::VReg_1024RegClass.contains (Reg)) {
380
+ IsSGPR = false ;
381
+ Width = 32 ;
382
+ } else if (AMDGPU::AReg_1024RegClass.contains (Reg)) {
383
+ IsSGPR = false ;
384
+ IsAGPR = true ;
385
+ Width = 32 ;
386
+ } else {
387
+ // We only expect TTMP registers or registers that do not belong to
388
+ // any RC.
389
+ assert ((AMDGPU::TTMP_32RegClass.contains (Reg) ||
390
+ AMDGPU::TTMP_64RegClass.contains (Reg) ||
391
+ AMDGPU::TTMP_128RegClass.contains (Reg) ||
392
+ AMDGPU::TTMP_256RegClass.contains (Reg) ||
393
+ AMDGPU::TTMP_512RegClass.contains (Reg) ||
394
+ !TRI.getPhysRegBaseClass (Reg)) &&
395
+ " Unknown register class" );
396
+ }
397
+ unsigned HWReg = TRI.getHWRegIndex (Reg);
398
+ int MaxUsed = HWReg + Width - 1 ;
399
+ if (IsSGPR) {
400
+ MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
401
+ } else if (IsAGPR) {
402
+ MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
403
+ } else {
404
+ MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
405
+ }
406
+ }
407
+
163
408
if (MI.isCall ()) {
164
409
// Pseudo used just to encode the underlying global. Is there a better
165
410
// way to track this?
@@ -219,5 +464,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
219
464
}
220
465
}
221
466
467
+ Info.NumExplicitSGPR = MaxSGPR + 1 ;
468
+ Info.NumVGPR = MaxVGPR + 1 ;
469
+ Info.NumAGPR = MaxAGPR + 1 ;
470
+
222
471
return Info;
223
472
}
0 commit comments