@@ -137,274 +137,29 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
137
137
if (MFI->isStackRealigned ())
138
138
Info.PrivateSegmentSize += FrameInfo.getMaxAlign ().value ();
139
139
140
- Info.UsesVCC =
141
- MRI.isPhysRegUsed (AMDGPU::VCC_LO) || MRI.isPhysRegUsed (AMDGPU::VCC_HI);
142
-
143
- // If there are no calls, MachineRegisterInfo can tell us the used register
144
- // count easily.
145
- // A tail call isn't considered a call for MachineFrameInfo's purposes.
146
- if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ()) {
147
- Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
148
- Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
149
- if (ST.hasMAIInsts ())
150
- Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
151
- return Info;
140
+ Info.UsesVCC = MRI.isPhysRegUsed (AMDGPU::VCC);
141
+
142
+ Info.NumVGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
143
+ Info.NumExplicitSGPR =
144
+ TRI.getNumDefinedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
145
+ if (ST.hasMAIInsts ())
146
+ Info.NumAGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
147
+
148
+ // Preloaded registers are written by the hardware, not defined in the
149
+ // function body, so they need special handling.
150
+ if (MFI->isEntryFunction ()) {
151
+ Info.NumExplicitSGPR =
152
+ std::max<int32_t >(Info.NumExplicitSGPR , MFI->getNumPreloadedSGPRs ());
153
+ Info.NumVGPR = std::max<int32_t >(Info.NumVGPR , MFI->getNumPreloadedVGPRs ());
152
154
}
153
155
154
- int32_t MaxVGPR = - 1 ;
155
- int32_t MaxAGPR = - 1 ;
156
- int32_t MaxSGPR = - 1 ;
156
+ if (!FrameInfo. hasCalls () && !FrameInfo. hasTailCall ())
157
+ return Info ;
158
+
157
159
Info.CalleeSegmentSize = 0 ;
158
160
159
161
for (const MachineBasicBlock &MBB : MF) {
160
162
for (const MachineInstr &MI : MBB) {
161
- // TODO: Check regmasks? Do they occur anywhere except calls?
162
- for (const MachineOperand &MO : MI.operands ()) {
163
- unsigned Width = 0 ;
164
- bool IsSGPR = false ;
165
- bool IsAGPR = false ;
166
-
167
- if (!MO.isReg ())
168
- continue ;
169
-
170
- Register Reg = MO.getReg ();
171
- switch (Reg) {
172
- case AMDGPU::EXEC:
173
- case AMDGPU::EXEC_LO:
174
- case AMDGPU::EXEC_HI:
175
- case AMDGPU::SCC:
176
- case AMDGPU::M0:
177
- case AMDGPU::M0_LO16:
178
- case AMDGPU::M0_HI16:
179
- case AMDGPU::SRC_SHARED_BASE_LO:
180
- case AMDGPU::SRC_SHARED_BASE:
181
- case AMDGPU::SRC_SHARED_LIMIT_LO:
182
- case AMDGPU::SRC_SHARED_LIMIT:
183
- case AMDGPU::SRC_PRIVATE_BASE_LO:
184
- case AMDGPU::SRC_PRIVATE_BASE:
185
- case AMDGPU::SRC_PRIVATE_LIMIT_LO:
186
- case AMDGPU::SRC_PRIVATE_LIMIT:
187
- case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
188
- case AMDGPU::SGPR_NULL:
189
- case AMDGPU::SGPR_NULL64:
190
- case AMDGPU::MODE:
191
- continue ;
192
-
193
- case AMDGPU::NoRegister:
194
- assert (MI.isDebugInstr () &&
195
- " Instruction uses invalid noreg register" );
196
- continue ;
197
-
198
- case AMDGPU::VCC:
199
- case AMDGPU::VCC_LO:
200
- case AMDGPU::VCC_HI:
201
- case AMDGPU::VCC_LO_LO16:
202
- case AMDGPU::VCC_LO_HI16:
203
- case AMDGPU::VCC_HI_LO16:
204
- case AMDGPU::VCC_HI_HI16:
205
- Info.UsesVCC = true ;
206
- continue ;
207
-
208
- case AMDGPU::FLAT_SCR:
209
- case AMDGPU::FLAT_SCR_LO:
210
- case AMDGPU::FLAT_SCR_HI:
211
- continue ;
212
-
213
- case AMDGPU::XNACK_MASK:
214
- case AMDGPU::XNACK_MASK_LO:
215
- case AMDGPU::XNACK_MASK_HI:
216
- llvm_unreachable (" xnack_mask registers should not be used" );
217
-
218
- case AMDGPU::LDS_DIRECT:
219
- llvm_unreachable (" lds_direct register should not be used" );
220
-
221
- case AMDGPU::TBA:
222
- case AMDGPU::TBA_LO:
223
- case AMDGPU::TBA_HI:
224
- case AMDGPU::TMA:
225
- case AMDGPU::TMA_LO:
226
- case AMDGPU::TMA_HI:
227
- llvm_unreachable (" trap handler registers should not be used" );
228
-
229
- case AMDGPU::SRC_VCCZ:
230
- llvm_unreachable (" src_vccz register should not be used" );
231
-
232
- case AMDGPU::SRC_EXECZ:
233
- llvm_unreachable (" src_execz register should not be used" );
234
-
235
- case AMDGPU::SRC_SCC:
236
- llvm_unreachable (" src_scc register should not be used" );
237
-
238
- default :
239
- break ;
240
- }
241
-
242
- if (AMDGPU::SGPR_32RegClass.contains (Reg) ||
243
- AMDGPU::SGPR_LO16RegClass.contains (Reg) ||
244
- AMDGPU::SGPR_HI16RegClass.contains (Reg)) {
245
- IsSGPR = true ;
246
- Width = 1 ;
247
- } else if (AMDGPU::VGPR_32RegClass.contains (Reg) ||
248
- AMDGPU::VGPR_16RegClass.contains (Reg)) {
249
- IsSGPR = false ;
250
- Width = 1 ;
251
- } else if (AMDGPU::AGPR_32RegClass.contains (Reg) ||
252
- AMDGPU::AGPR_LO16RegClass.contains (Reg)) {
253
- IsSGPR = false ;
254
- IsAGPR = true ;
255
- Width = 1 ;
256
- } else if (AMDGPU::SGPR_64RegClass.contains (Reg)) {
257
- IsSGPR = true ;
258
- Width = 2 ;
259
- } else if (AMDGPU::VReg_64RegClass.contains (Reg)) {
260
- IsSGPR = false ;
261
- Width = 2 ;
262
- } else if (AMDGPU::AReg_64RegClass.contains (Reg)) {
263
- IsSGPR = false ;
264
- IsAGPR = true ;
265
- Width = 2 ;
266
- } else if (AMDGPU::VReg_96RegClass.contains (Reg)) {
267
- IsSGPR = false ;
268
- Width = 3 ;
269
- } else if (AMDGPU::SReg_96RegClass.contains (Reg)) {
270
- IsSGPR = true ;
271
- Width = 3 ;
272
- } else if (AMDGPU::AReg_96RegClass.contains (Reg)) {
273
- IsSGPR = false ;
274
- IsAGPR = true ;
275
- Width = 3 ;
276
- } else if (AMDGPU::SGPR_128RegClass.contains (Reg)) {
277
- IsSGPR = true ;
278
- Width = 4 ;
279
- } else if (AMDGPU::VReg_128RegClass.contains (Reg)) {
280
- IsSGPR = false ;
281
- Width = 4 ;
282
- } else if (AMDGPU::AReg_128RegClass.contains (Reg)) {
283
- IsSGPR = false ;
284
- IsAGPR = true ;
285
- Width = 4 ;
286
- } else if (AMDGPU::VReg_160RegClass.contains (Reg)) {
287
- IsSGPR = false ;
288
- Width = 5 ;
289
- } else if (AMDGPU::SReg_160RegClass.contains (Reg)) {
290
- IsSGPR = true ;
291
- Width = 5 ;
292
- } else if (AMDGPU::AReg_160RegClass.contains (Reg)) {
293
- IsSGPR = false ;
294
- IsAGPR = true ;
295
- Width = 5 ;
296
- } else if (AMDGPU::VReg_192RegClass.contains (Reg)) {
297
- IsSGPR = false ;
298
- Width = 6 ;
299
- } else if (AMDGPU::SReg_192RegClass.contains (Reg)) {
300
- IsSGPR = true ;
301
- Width = 6 ;
302
- } else if (AMDGPU::AReg_192RegClass.contains (Reg)) {
303
- IsSGPR = false ;
304
- IsAGPR = true ;
305
- Width = 6 ;
306
- } else if (AMDGPU::VReg_224RegClass.contains (Reg)) {
307
- IsSGPR = false ;
308
- Width = 7 ;
309
- } else if (AMDGPU::SReg_224RegClass.contains (Reg)) {
310
- IsSGPR = true ;
311
- Width = 7 ;
312
- } else if (AMDGPU::AReg_224RegClass.contains (Reg)) {
313
- IsSGPR = false ;
314
- IsAGPR = true ;
315
- Width = 7 ;
316
- } else if (AMDGPU::SReg_256RegClass.contains (Reg)) {
317
- IsSGPR = true ;
318
- Width = 8 ;
319
- } else if (AMDGPU::VReg_256RegClass.contains (Reg)) {
320
- IsSGPR = false ;
321
- Width = 8 ;
322
- } else if (AMDGPU::AReg_256RegClass.contains (Reg)) {
323
- IsSGPR = false ;
324
- IsAGPR = true ;
325
- Width = 8 ;
326
- } else if (AMDGPU::VReg_288RegClass.contains (Reg)) {
327
- IsSGPR = false ;
328
- Width = 9 ;
329
- } else if (AMDGPU::SReg_288RegClass.contains (Reg)) {
330
- IsSGPR = true ;
331
- Width = 9 ;
332
- } else if (AMDGPU::AReg_288RegClass.contains (Reg)) {
333
- IsSGPR = false ;
334
- IsAGPR = true ;
335
- Width = 9 ;
336
- } else if (AMDGPU::VReg_320RegClass.contains (Reg)) {
337
- IsSGPR = false ;
338
- Width = 10 ;
339
- } else if (AMDGPU::SReg_320RegClass.contains (Reg)) {
340
- IsSGPR = true ;
341
- Width = 10 ;
342
- } else if (AMDGPU::AReg_320RegClass.contains (Reg)) {
343
- IsSGPR = false ;
344
- IsAGPR = true ;
345
- Width = 10 ;
346
- } else if (AMDGPU::VReg_352RegClass.contains (Reg)) {
347
- IsSGPR = false ;
348
- Width = 11 ;
349
- } else if (AMDGPU::SReg_352RegClass.contains (Reg)) {
350
- IsSGPR = true ;
351
- Width = 11 ;
352
- } else if (AMDGPU::AReg_352RegClass.contains (Reg)) {
353
- IsSGPR = false ;
354
- IsAGPR = true ;
355
- Width = 11 ;
356
- } else if (AMDGPU::VReg_384RegClass.contains (Reg)) {
357
- IsSGPR = false ;
358
- Width = 12 ;
359
- } else if (AMDGPU::SReg_384RegClass.contains (Reg)) {
360
- IsSGPR = true ;
361
- Width = 12 ;
362
- } else if (AMDGPU::AReg_384RegClass.contains (Reg)) {
363
- IsSGPR = false ;
364
- IsAGPR = true ;
365
- Width = 12 ;
366
- } else if (AMDGPU::SReg_512RegClass.contains (Reg)) {
367
- IsSGPR = true ;
368
- Width = 16 ;
369
- } else if (AMDGPU::VReg_512RegClass.contains (Reg)) {
370
- IsSGPR = false ;
371
- Width = 16 ;
372
- } else if (AMDGPU::AReg_512RegClass.contains (Reg)) {
373
- IsSGPR = false ;
374
- IsAGPR = true ;
375
- Width = 16 ;
376
- } else if (AMDGPU::SReg_1024RegClass.contains (Reg)) {
377
- IsSGPR = true ;
378
- Width = 32 ;
379
- } else if (AMDGPU::VReg_1024RegClass.contains (Reg)) {
380
- IsSGPR = false ;
381
- Width = 32 ;
382
- } else if (AMDGPU::AReg_1024RegClass.contains (Reg)) {
383
- IsSGPR = false ;
384
- IsAGPR = true ;
385
- Width = 32 ;
386
- } else {
387
- // We only expect TTMP registers or registers that do not belong to
388
- // any RC.
389
- assert ((AMDGPU::TTMP_32RegClass.contains (Reg) ||
390
- AMDGPU::TTMP_64RegClass.contains (Reg) ||
391
- AMDGPU::TTMP_128RegClass.contains (Reg) ||
392
- AMDGPU::TTMP_256RegClass.contains (Reg) ||
393
- AMDGPU::TTMP_512RegClass.contains (Reg) ||
394
- !TRI.getPhysRegBaseClass (Reg)) &&
395
- " Unknown register class" );
396
- }
397
- unsigned HWReg = TRI.getHWRegIndex (Reg);
398
- int MaxUsed = HWReg + Width - 1 ;
399
- if (IsSGPR) {
400
- MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
401
- } else if (IsAGPR) {
402
- MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
403
- } else {
404
- MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
405
- }
406
- }
407
-
408
163
if (MI.isCall ()) {
409
164
// Pseudo used just to encode the underlying global. Is there a better
410
165
// way to track this?
@@ -464,9 +219,5 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
464
219
}
465
220
}
466
221
467
- Info.NumExplicitSGPR = MaxSGPR + 1 ;
468
- Info.NumVGPR = MaxVGPR + 1 ;
469
- Info.NumAGPR = MaxAGPR + 1 ;
470
-
471
222
return Info;
472
223
}
0 commit comments