@@ -116,31 +116,112 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
116
116
<< " , SGPRExcessLimit = " << SGPRExcessLimit << " \n\n " );
117
117
}
118
118
119
+ // / Checks whether \p SU can use the cached DAG pressure diffs to compute the
120
+ // / current register pressure.
121
+ // /
122
+ // / This works for the common case, but it has a few exceptions that have been
123
+ // / observed through trial and error:
124
+ // / - Explicit physical register operands
125
+ // / - Subregister definitions
126
+ // /
127
+ // / In both of those cases, PressureDiff doesn't represent the actual pressure,
128
+ // / and querying LiveIntervals through the RegPressureTracker is needed to get
129
+ // / an accurate value.
130
+ // /
131
+ // / We should eventually only use PressureDiff for maximum performance, but this
132
+ // / already allows 80% of SUs to take the fast path without changing scheduling
133
+ // / at all. Further changes would either change scheduling, or require a lot
134
+ // / more logic to recover an accurate pressure estimate from the PressureDiffs.
135
+ static bool canUsePressureDiffs (const SUnit &SU) {
136
+ if (!SU.isInstr ())
137
+ return false ;
138
+
139
+ // Cannot use pressure diffs for subregister defs or with physregs, it's
140
+ // imprecise in both cases.
141
+ for (const auto &Op : SU.getInstr ()->operands ()) {
142
+ if (!Op.isReg () || Op.isImplicit ())
143
+ continue ;
144
+ if (Op.getReg ().isPhysical () ||
145
+ (Op.isDef () && Op.getSubReg () != AMDGPU::NoSubRegister))
146
+ return false ;
147
+ }
148
+ return true ;
149
+ }
150
+
151
+ static void getRegisterPressures (bool AtTop,
152
+ const RegPressureTracker &RPTracker, SUnit *SU,
153
+ std::vector<unsigned > &Pressure,
154
+ std::vector<unsigned > &MaxPressure) {
155
+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
156
+ // the tracker, so we need to pass those function a non-const copy.
157
+ RegPressureTracker &TempTracker = const_cast <RegPressureTracker &>(RPTracker);
158
+ if (AtTop)
159
+ TempTracker.getDownwardPressure (SU->getInstr (), Pressure, MaxPressure);
160
+ else
161
+ TempTracker.getUpwardPressure (SU->getInstr (), Pressure, MaxPressure);
162
+ }
163
+
119
164
void GCNSchedStrategy::initCandidate (SchedCandidate &Cand, SUnit *SU,
120
165
bool AtTop,
121
166
const RegPressureTracker &RPTracker,
122
167
const SIRegisterInfo *SRI,
123
168
unsigned SGPRPressure,
124
- unsigned VGPRPressure) {
169
+ unsigned VGPRPressure, bool IsBottomUp ) {
125
170
Cand.SU = SU;
126
171
Cand.AtTop = AtTop;
127
172
128
173
if (!DAG->isTrackingPressure ())
129
174
return ;
130
175
131
- // getDownwardPressure() and getUpwardPressure() make temporary changes to
132
- // the tracker, so we need to pass those function a non-const copy.
133
- RegPressureTracker &TempTracker = const_cast <RegPressureTracker&>(RPTracker);
134
-
135
176
Pressure.clear ();
136
177
MaxPressure.clear ();
137
178
138
- if (AtTop)
139
- TempTracker.getDownwardPressure (SU->getInstr (), Pressure, MaxPressure);
140
- else {
141
- // FIXME: I think for bottom up scheduling, the register pressure is cached
142
- // and can be retrieved by DAG->getPressureDif(SU).
143
- TempTracker.getUpwardPressure (SU->getInstr (), Pressure, MaxPressure);
179
+ // We try to use the cached PressureDiffs in the ScheduleDAG whenever
180
+ // possible over querying the RegPressureTracker.
181
+ //
182
+ // RegPressureTracker will make a lot of LIS queries which are very
183
+ // expensive, it is considered a slow function in this context.
184
+ //
185
+ // PressureDiffs are precomputed and cached, and getPressureDiff is just a
186
+ // trivial lookup into an array. It is pretty much free.
187
+ //
188
+ // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
189
+ // PressureDiffs.
190
+ if (AtTop || !canUsePressureDiffs (*SU)) {
191
+ getRegisterPressures (AtTop, RPTracker, SU, Pressure, MaxPressure);
192
+ } else {
193
+ // Reserve 4 slots.
194
+ Pressure.resize (4 , 0 );
195
+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
196
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
197
+
198
+ for (const auto &Diff : DAG->getPressureDiff (SU)) {
199
+ if (!Diff.isValid ())
200
+ continue ;
201
+ // PressureDiffs is always bottom-up so if we're working top-down we need
202
+ // to invert its sign.
203
+ Pressure[Diff.getPSet ()] +=
204
+ (IsBottomUp ? Diff.getUnitInc () : -Diff.getUnitInc ());
205
+ }
206
+
207
+ #ifdef EXPENSIVE_CHECKS
208
+ std::vector<unsigned > CheckPressure, CheckMaxPressure;
209
+ getRegisterPressures (AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
210
+ if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
211
+ CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
212
+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
213
+ CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
214
+ errs () << " Register Pressure is inaccurate when calculated through "
215
+ " PressureDiff\n "
216
+ << " SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
217
+ << " , expected "
218
+ << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << " \n "
219
+ << " VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
220
+ << " , expected "
221
+ << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << " \n " ;
222
+ report_fatal_error (" inaccurate register pressure calculation" );
223
+ }
224
+ #endif
144
225
}
145
226
146
227
unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
@@ -158,7 +239,6 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
158
239
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
159
240
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
160
241
161
-
162
242
// FIXME: We have to enter REG-EXCESS before we reach the actual threshold
163
243
// to increase the likelihood we don't go over the limits. We should improve
164
244
// the analysis to look through dependencies to find the path with the least
@@ -207,7 +287,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
207
287
void GCNSchedStrategy::pickNodeFromQueue (SchedBoundary &Zone,
208
288
const CandPolicy &ZonePolicy,
209
289
const RegPressureTracker &RPTracker,
210
- SchedCandidate &Cand) {
290
+ SchedCandidate &Cand,
291
+ bool IsBottomUp) {
211
292
const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo*>(TRI);
212
293
ArrayRef<unsigned > Pressure = RPTracker.getRegSetPressureAtPos ();
213
294
unsigned SGPRPressure = 0 ;
@@ -220,8 +301,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
220
301
for (SUnit *SU : Q) {
221
302
222
303
SchedCandidate TryCand (ZonePolicy);
223
- initCandidate (TryCand, SU, Zone.isTop (), RPTracker, SRI,
224
- SGPRPressure, VGPRPressure );
304
+ initCandidate (TryCand, SU, Zone.isTop (), RPTracker, SRI, SGPRPressure,
305
+ VGPRPressure, IsBottomUp );
225
306
// Pass SchedBoundary only when comparing nodes from the same boundary.
226
307
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr ;
227
308
tryCandidate (Cand, TryCand, ZoneArg);
@@ -262,15 +343,17 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
262
343
if (!BotCand.isValid () || BotCand.SU ->isScheduled ||
263
344
BotCand.Policy != BotPolicy) {
264
345
BotCand.reset (CandPolicy ());
265
- pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), BotCand);
346
+ pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), BotCand,
347
+ /* IsBottomUp=*/ true );
266
348
assert (BotCand.Reason != NoCand && " failed to find the first candidate" );
267
349
} else {
268
350
LLVM_DEBUG (traceCandidate (BotCand));
269
351
#ifndef NDEBUG
270
352
if (VerifyScheduling) {
271
353
SchedCandidate TCand;
272
354
TCand.reset (CandPolicy ());
273
- pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), TCand);
355
+ pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), TCand,
356
+ /* IsBottomUp=*/ true );
274
357
assert (TCand.SU == BotCand.SU &&
275
358
" Last pick result should correspond to re-picking right now" );
276
359
}
@@ -282,15 +365,17 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
282
365
if (!TopCand.isValid () || TopCand.SU ->isScheduled ||
283
366
TopCand.Policy != TopPolicy) {
284
367
TopCand.reset (CandPolicy ());
285
- pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TopCand);
368
+ pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TopCand,
369
+ /* IsBottomUp=*/ false );
286
370
assert (TopCand.Reason != NoCand && " failed to find the first candidate" );
287
371
} else {
288
372
LLVM_DEBUG (traceCandidate (TopCand));
289
373
#ifndef NDEBUG
290
374
if (VerifyScheduling) {
291
375
SchedCandidate TCand;
292
376
TCand.reset (CandPolicy ());
293
- pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TCand);
377
+ pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TCand,
378
+ /* IsBottomUp=*/ false );
294
379
assert (TCand.SU == TopCand.SU &&
295
380
" Last pick result should correspond to re-picking right now" );
296
381
}
@@ -327,7 +412,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
327
412
if (!SU) {
328
413
CandPolicy NoPolicy;
329
414
TopCand.reset (NoPolicy);
330
- pickNodeFromQueue (Top, NoPolicy, DAG->getTopRPTracker (), TopCand);
415
+ pickNodeFromQueue (Top, NoPolicy, DAG->getTopRPTracker (), TopCand,
416
+ /* IsBottomUp=*/ false );
331
417
assert (TopCand.Reason != NoCand && " failed to find a candidate" );
332
418
SU = TopCand.SU ;
333
419
}
@@ -337,7 +423,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
337
423
if (!SU) {
338
424
CandPolicy NoPolicy;
339
425
BotCand.reset (NoPolicy);
340
- pickNodeFromQueue (Bot, NoPolicy, DAG->getBotRPTracker (), BotCand);
426
+ pickNodeFromQueue (Bot, NoPolicy, DAG->getBotRPTracker (), BotCand,
427
+ /* IsBottomUp=*/ true );
341
428
assert (BotCand.Reason != NoCand && " failed to find a candidate" );
342
429
SU = BotCand.SU ;
343
430
}
0 commit comments