@@ -38,9 +38,6 @@ void LocalScheduler::localScheduling() {
38
38
const Options *m_options = fg.builder ->getOptions ();
39
39
LatencyTable LT (fg.builder );
40
40
41
- PointsToAnalysis p (fg.getKernel ()->Declares , fg.size ());
42
- p.doPointsToAnalysis (fg);
43
-
44
41
uint32_t totalCycles = 0 ;
45
42
uint32_t scheduleStartBBId =
46
43
m_options->getuInt32Option (vISA_LocalSchedulingStartBB);
@@ -86,7 +83,7 @@ void LocalScheduler::localScheduling() {
86
83
G4_BB *tempBB = fg.createNewBB (false );
87
84
sections.push_back (tempBB);
88
85
tempBB->splice (tempBB->begin (), (*ib), (*ib)->begin (), inst_it);
89
- G4_BB_Schedule schedule (fg.getKernel (), bbMem, tempBB, LT, p );
86
+ G4_BB_Schedule schedule (fg.getKernel (), bbMem, tempBB, LT);
90
87
sequentialCycles += schedule.sequentialCycle ;
91
88
sendStallCycles += schedule.sendStallCycle ;
92
89
count = 0 ;
@@ -108,7 +105,7 @@ void LocalScheduler::localScheduling() {
108
105
bbInfo[i].loopNestLevel = (*ib)->getNestLevel ();
109
106
totalCycles += sequentialCycles;
110
107
} else {
111
- G4_BB_Schedule schedule (fg.getKernel (), bbMem, *ib, LT, p );
108
+ G4_BB_Schedule schedule (fg.getKernel (), bbMem, *ib, LT);
112
109
bbInfo[i].id = (*ib)->getId ();
113
110
bbInfo[i].staticCycle = schedule.sequentialCycle ;
114
111
bbInfo[i].sendStallCycle = schedule.sendStallCycle ;
@@ -198,14 +195,15 @@ void G4_BB_Schedule::dumpSchedule(G4_BB *bb) {
198
195
// - creates a new instruction listing within a BBB
199
196
//
200
197
G4_BB_Schedule::G4_BB_Schedule (G4_Kernel *k, Mem_Manager &m, G4_BB *block,
201
- const LatencyTable <, PointsToAnalysis &p)
202
- : mem(m), bb(block), kernel(k), pointsToAnalysis(p)
198
+ const LatencyTable <)
199
+ : mem(m), bb(block), kernel(k)
200
+
203
201
{
204
202
// we use local id in the scheduler for determining two instructions' original
205
203
// ordering
206
204
bb->resetLocalIds ();
207
205
208
- DDD ddd (mem, bb, LT, k, p );
206
+ DDD ddd (mem, bb, LT, k);
209
207
// Generate pairs of TypedWrites
210
208
bool doMessageFuse =
211
209
(k->fg .builder ->fuseTypedWrites () && k->getSimdSize () >= g4::SIMD16) ||
@@ -332,44 +330,6 @@ static Mask getMaskForOp(G4_Operand *opnd, Gen4_Operand_Number opnd_num,
332
330
return Mask (LB, RB, nonContiguousStride, opnd->getAccRegSel ());
333
331
}
334
332
335
- void DDD::getBucketsForIndirectOperand (G4_INST *inst,
336
- Gen4_Operand_Number opnd_num,
337
- std::vector<BucketDescr> &BDvec) {
338
- G4_Declare *addrdcl = nullptr ;
339
- G4_Operand *opnd = inst->getOperand (opnd_num);
340
- if (opnd) {
341
- addrdcl = GetTopDclFromRegRegion (opnd);
342
- }
343
- assert (addrdcl != nullptr && " address declare can not be nullptr" );
344
-
345
- auto pointsToSet = pointsToAnalysis.getAllInPointsTo (addrdcl->getRegVar ());
346
- for (auto &pt : *pointsToSet) {
347
- uint32_t varID = pt.var ->getId ();
348
- G4_Declare *dcl = pt.var ->getDeclare ()->getRootDeclare ();
349
- G4_RegVar *var = dcl->getRegVar ();
350
-
351
- assert (var->getId () == varID &&
352
- " RA verification error: Invalid regVar ID!" );
353
- assert (var->getPhyReg ()->isGreg () &&
354
- " RA verification error: Invalid dst reg!" );
355
-
356
- uint32_t regNum = var->getPhyReg ()->asGreg ()->getRegNum ();
357
- uint32_t regOff = var->getPhyRegOff ();
358
- int linearizedStart = regNum * kernel->numEltPerGRF <Type_UB>() +
359
- regOff * TypeSize (dcl->getElemType ());
360
- int linearizedEnd = linearizedStart + dcl->getByteSize () - 1 ;
361
-
362
- int startingBucket = linearizedStart / kernel->numEltPerGRF <Type_UB>();
363
- int endingBucket = linearizedEnd / kernel->numEltPerGRF <Type_UB>();
364
- Mask mask (linearizedStart, linearizedEnd, false , opnd->getAccRegSel ());
365
- int numBuckets = endingBucket - startingBucket + 1 ;
366
- for (int j = startingBucket; j < (startingBucket + numBuckets); j++) {
367
- BDvec.push_back (BucketDescr (j, mask, opnd_num));
368
- }
369
- }
370
- return ;
371
- }
372
-
373
333
void DDD::getBucketsForOperand (G4_INST *inst, Gen4_Operand_Number opnd_num,
374
334
std::vector<BucketDescr> &BDvec) {
375
335
G4_Operand *opnd = inst->getOperand (opnd_num);
@@ -476,7 +436,8 @@ static inline bool hasIndirection(G4_Operand *opnd,
476
436
// return all bucket descriptors that the physical register can map
477
437
// to. This requires taking in to account exec size, data
478
438
// type, and whether inst is a send
479
- void DDD::getBucketDescrs (Node *node, std::vector<BucketDescr> &BDvec) {
439
+ bool DDD::getBucketDescrs (Node *node, std::vector<BucketDescr> &BDvec) {
440
+ bool hasIndir = false ;
480
441
for (G4_INST *inst : node->instVec ) {
481
442
// Iterate over all operands and create buckets.
482
443
for (Gen4_Operand_Number opndNum :
@@ -489,9 +450,7 @@ void DDD::getBucketDescrs(Node *node, std::vector<BucketDescr> &BDvec) {
489
450
}
490
451
getBucketsForOperand (inst, opndNum, BDvec);
491
452
// Check if this operand is an indirect access
492
- if (hasIndirection (opnd, opndNum)) {
493
- getBucketsForIndirectOperand (inst, opndNum, BDvec);
494
- }
453
+ hasIndir |= hasIndirection (opnd, opndNum);
495
454
}
496
455
497
456
// Sends need an additional bucket
@@ -504,7 +463,7 @@ void DDD::getBucketDescrs(Node *node, std::vector<BucketDescr> &BDvec) {
504
463
}
505
464
}
506
465
507
- return ;
466
+ return hasIndir ;
508
467
}
509
468
510
469
// This class hides the internals of dependence tracking using buckets
@@ -1235,9 +1194,8 @@ bool DDD::hasSameSourceOneDPAS(G4_INST *curInst, G4_INST *nextInst,
1235
1194
// dependencies with all insts in live set. After analyzing
1236
1195
// dependencies and creating necessary edges, current inst
1237
1196
// is inserted in all buckets it touches.
1238
- DDD::DDD (Mem_Manager &m, G4_BB *bb, const LatencyTable <, G4_Kernel *k,
1239
- PointsToAnalysis &p)
1240
- : mem(m), LT(lt), kernel(k), pointsToAnalysis(p) {
1197
+ DDD::DDD (Mem_Manager &m, G4_BB *bb, const LatencyTable <, G4_Kernel *k)
1198
+ : mem(m), LT(lt), kernel(k) {
1241
1199
Node *lastBarrier = nullptr ;
1242
1200
HWthreadsPerEU = k->getNumThreads ();
1243
1201
useMTLatencies = getBuilder ()->useMultiThreadLatency ();
@@ -1275,6 +1233,7 @@ DDD::DDD(Mem_Manager &m, G4_BB *bb, const LatencyTable <, G4_Kernel *k,
1275
1233
node = new (mem) Node (nodeId, *iInst, depEdgeAllocator, LT);
1276
1234
allNodes.push_back (node);
1277
1235
G4_INST *curInst = node->getInstructions ()->front ();
1236
+ bool hasIndir = false ;
1278
1237
BDvec.clear ();
1279
1238
1280
1239
if (curInst->getNumSrc () == 3 ) {
@@ -1352,8 +1311,9 @@ DDD::DDD(Mem_Manager &m, G4_BB *bb, const LatencyTable <, G4_Kernel *k,
1352
1311
}
1353
1312
}
1354
1313
// Get buckets for all physical registers assigned in curInst
1355
- getBucketDescrs (node, BDvec);
1356
- if (curInst->isSend () && curInst->asSendInst ()->isFence ()) {
1314
+ hasIndir = getBucketDescrs (node, BDvec);
1315
+ if (hasIndir || (curInst->isSend () && curInst->asSendInst ()->isFence ())) {
1316
+ // If inst has indirect src/dst then treat it as a barrier.
1357
1317
node->MarkAsUnresolvedIndirAddressBarrier ();
1358
1318
}
1359
1319
0 commit comments